File Coverage

blib/lib/Data/FlexSerializer.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Data::FlexSerializer;
2 5     5   393788 use Moose;
  0            
  0            
3             use MooseX::ClassAttribute;
4             use MooseX::Types::Moose qw(ArrayRef HashRef Maybe Bool Int Str Object CodeRef);
5             use MooseX::Types::Structured qw(Dict Tuple Map);
6             use MooseX::Types -declare => [ qw(
7             FormatHandler
8             FormatBool
9             ) ];
10             use autodie;
11              
12             our $VERSION = '1.10';
13              
14             # Get the DEBUG constant from $Data::FlexSerializer::DEBUG or
15             # $ENV{DATA_FLEXSERIALIZER_DEBUG}
16             use Constant::FromGlobal DEBUG => { int => 1, default => 0, env => 1 };
17              
18             use List::Util qw(min);
19             use Storable qw();
20             use JSON::XS qw();
21             use Sereal::Decoder qw();
22             use Sereal::Encoder qw();
23             use Compress::Zlib qw(Z_DEFAULT_COMPRESSION);
24             use IO::Uncompress::AnyInflate qw();
25             use Carp ();
26             use Data::Dumper qw(Dumper);
27              
28             subtype FormatHandler,
29             as Dict [
30             detect => CodeRef,
31             serialize => CodeRef,
32             deserialize => CodeRef,
33             ],
34             message { 'A format needs to be passed as an hashref with "serialize", "deserialize" and "detect" keys that point to a coderef to perform the respective action' };
35              
36             subtype FormatBool,
37             as Map[Str, Bool];
38              
39             coerce FormatBool,
40             from ArrayRef,
41             via { { map lc $_ => 1, @$_ } },
42             from Str,
43             via { { lc $_ => 1 } },
44             ;
45              
46             class_has formats => (
47             traits => ['Hash'],
48             is => 'rw',
49             isa => HashRef[FormatHandler],
50             default => sub {
51             {
52             json => {
53             detect => sub { $_[1] =~ /^(?:\{|\[)/ },
54             serialize => sub { shift; goto \&JSON::XS::encode_json },
55             deserialize => sub { shift; goto \&JSON::XS::decode_json },
56             },
57             storable => {
58             detect => sub { $_[1] =~ s/^pst0// }, # this is not a real detector.
59             # It just removes the storable
60             # file magic if necessary.
61             # Tho' storable needs to be last
62             serialize => sub { shift; goto \&Storable::nfreeze },
63             deserialize => sub { shift; goto \&Storable::thaw },
64             },
65             sereal => {
66             detect => sub { shift->{sereal_decoder}->looks_like_sereal(@_) },
67             serialize => sub { shift->{sereal_encoder}->encode(@_) },
68             deserialize => sub { my $structure; shift->{sereal_decoder}->decode($_[0], $structure); $structure },
69             },
70             }
71             },
72             handles => {
73             add_format => 'set',
74             get_format => 'get',
75             has_format => 'exists',
76             supported_formats => 'keys',
77             },
78             );
79              
80             has output_format => (
81             is => 'ro',
82             isa => Str,
83             default => 'json',
84             );
85              
86             has detect_formats => (
87             traits => ['Hash'],
88             is => 'ro',
89             isa => FormatBool,
90             default => sub { { json => 1, sereal => 0, storable => 0 } },
91             coerce => 1,
92             handles => {
93             detect_json => [ get => 'json' ],
94             detect_storable => [ get => 'storable' ],
95             detect_sereal => [ get => 'sereal' ],
96             _set_detect_json => [ set => 'json' ],
97             _set_detect_storable => [ set => 'storable' ],
98             _set_detect_sereal => [ set => 'sereal' ],
99             list_detect_formats => 'kv',
100             }
101             );
102              
103             has assume_compression => (
104             is => 'ro',
105             isa => Bool,
106             default => 1,
107             );
108              
109             has detect_compression => (
110             is => 'ro',
111             isa => Bool,
112             default => 0,
113             );
114              
115             has compress_output => (
116             is => 'ro',
117             isa => Bool,
118             default => 1,
119             );
120              
121             has compression_level => (
122             is => 'ro',
123             isa => Maybe[Int],
124             );
125              
126             has sereal_encoder => (
127             is => 'ro',
128             isa => Object,
129             lazy_build => 1,
130             );
131              
132             sub _build_sereal_encoder { Sereal::Encoder->new }
133              
134             has sereal_decoder => (
135             is => 'ro',
136             isa => Object,
137             lazy_build => 1,
138             );
139              
140             sub _build_sereal_decoder { Sereal::Decoder->new }
141              
142             around BUILDARGS => sub {
143             my ( $orig, $class, %args ) = @_;
144              
145             # We change the default on assume_compression to "off" if the
146             # user sets detect_compression explicitly
147             if (exists $args{detect_compression} and
148             not exists $args{assume_compression}) {
149             $args{assume_compression} = 0;
150             }
151              
152             if ($args{assume_compression} and $args{detect_compression}) {
153             die "Can't assume compression and auto-detect compression at the same time. That makes no sense.";
154             }
155              
156             my %detect_formats = map {
157             exists $args{"detect_$_"} ? ($_ => $args{"detect_$_"}) : ()
158             } $class->supported_formats;
159              
160             if (%detect_formats) {
161             if ($args{detect_formats}) {
162             $args{detect_formats} = [ $args{detect_formats} ] unless ref $args{detect_formats};
163             if (ref $args{detect_formats} eq 'ARRAY') {
164             for my $format (@{$args{detect_formats}}) {
165             die "Can't have $format in detect_formats and detect_$format set to false at the same time"
166             if exists $detect_formats{$format} && !$detect_formats{$format};
167             $detect_formats{$format} = 1;
168             }
169             } else {
170             for my $format (keys %{$args{detect_formats}}) {
171             die "Can't have $format in detect_formats and detect_$format set to false at the same time"
172             if exists $detect_formats{$format}
173             && exists $args{detect_formats}{$format}
174             && $detect_formats{$format} != $args{detect_formats}{$format};
175             $detect_formats{$format} = 1;
176             }
177             }
178             } else {
179             $args{detect_formats} = \%detect_formats;
180             }
181             }
182              
183             $args{output_format} = lc $args{output_format} if $args{output_format};
184              
185             for my $format (
186             ( $args{output_format} ? $args{output_format} : () ),
187             ( $args{detect_formats} ? keys %{$args{detect_formats}} : () )) {
188             die "'$format' is not a supported format" unless $class->has_format($format);
189             }
190              
191             my $rv = $class->$orig(%args);
192              
193             if (DEBUG) {
194             warn "Dumping the new FlexSerializer object.\n" . Dumper($rv);
195             }
196              
197             return $rv;
198             };
199              
200             sub BUILD {
201             my ($self) = @_;
202              
203             # build Sereal::{Decoder,Encoder} objects if necessary
204             $self->sereal_decoder if $self->detect_sereal;
205             $self->sereal_encoder if $self->output_format eq 'sereal';
206              
207             # For legacy reasons json should be on by default
208             $self->_set_detect_json(1) unless defined $self->detect_json;
209              
210             $self->{serializer_coderef} = $self->make_serializer;
211             $self->{deserializer_coderef} = $self->make_deserializer;
212              
213             return;
214             }
215              
216             sub serialize { goto $_[0]->{serializer_coderef} }
217             sub deserialize { goto $_[0]->{deserializer_coderef} }
218              
219             sub make_serializer {
220             my $self = shift;
221             my $compress_output = $self->compress_output;
222             my $output_format = $self->output_format;
223             my $comp_level;
224             $comp_level = $self->compression_level if $compress_output;
225              
226             if (DEBUG) {
227             warn(sprintf(
228             "FlexSerializer using the following options for serialization: "
229             . "compress_output=%s, compression_level=%s, output_format=%s",
230             map {defined $self->{$_} ? $self->{$_} : '<undef>'}
231             qw(compress_output compression_level output_format)
232             ));
233             }
234              
235             {
236             no strict 'refs';
237             my $class = ref $self;
238             *{"$class\::__serialize_$output_format"} =
239             $self->get_format($output_format)->{serialize}
240             or die "PANIC: unknown output format '$output_format'";
241             }
242              
243             my $code = "__serialize_$output_format(\$self, \$_)";
244              
245             if ($compress_output) {
246             my $comp_level_code = defined $comp_level ? $comp_level : 'Z_DEFAULT_COMPRESSION';
247             $code = "Compress::Zlib::compress(\\$code,$comp_level_code)";
248             }
249              
250             $code = sprintf q{
251             sub {
252             # local *__ANON__= "__ANON__serialize__";
253             my $self = shift;
254              
255             my @out;
256             push @out, %s for @_;
257              
258             return wantarray ? @out
259             : @out > 1 ? die( sprintf "You have %%d serialized structures, please call this method in list context", scalar @out )
260             : $out[0];
261              
262             return @out;
263             };
264             }, $code;
265              
266             warn $code if DEBUG >= 2;
267              
268             my $coderef = eval $code or do{
269             my $error = $@ || 'Zombie error';
270             die "Couldn't create the deserialization coderef: $error\n The code is: $code\n";
271             };
272              
273             return $coderef;
274             }
275              
276             sub make_deserializer {
277             my $self = shift;
278              
279             my $assume_compression = $self->assume_compression;
280             my $detect_compression = $self->detect_compression;
281              
282             my %detectors = %{$self->detect_formats};
283              
284             # Move storable to the end of the detectors list.
285             # We don't know how to detect it.
286             delete $detectors{storable} if exists $detectors{storable};
287             my @detectors = grep $detectors{$_}, $self->supported_formats;
288             push @detectors, 'storable' if $self->detect_storable;
289              
290             if (DEBUG) {
291             warn "Detectors: @detectors";
292             warn("FlexSerializer using the following options for deserialization: ",
293             join ', ', (map {defined $self->$_ ? "$_=@{[$self->$_]}" : "$_=<undef>"}
294             qw(assume_compression detect_compression)),
295             map { "detect_$_->[0]=$_->[1]" } $self->list_detect_formats
296             );
297             }
298              
299             my $uncompress_code;
300             if ($assume_compression) {
301             $uncompress_code = '
302             local $_ = Compress::Zlib::uncompress(\$serialized);
303             unless (defined $_) {
304             die "You\'ve told me to assume compression but calling uncompress() on your input string returns undef";
305             }';
306             }
307             elsif ($detect_compression) {
308             $uncompress_code = '
309             local $_;
310             my $inflatedok = IO::Uncompress::AnyInflate::anyinflate(\$serialized => \$_);
311             warn "FlexSerializer: Detected that the input was " . ($inflatedok ? "" : "not ") . "compressed"
312             if DEBUG >= 3;
313             $_ = $serialized if not $inflatedok;';
314             }
315             else {
316             warn "FlexSerializer: Not using compression" if DEBUG;
317             $uncompress_code = '
318             local $_ = $serialized;';
319             }
320              
321             my $code_detect = q!
322             warn "FlexSerializer: %2$s that the input was %1$s" if DEBUG >= 3;
323             warn sprintf "FlexSerializer: This was the %1$s input: '%s'",
324             substr($_, 0, min(length($_), 100)) if DEBUG >= 3;
325             push @out, __deserialize_%1$s($self, $_)!;
326              
327             my $detector = '__detect_%1$s($self, $_)';
328             my $body = "\n$code_detect\n }";
329              
330             my $code = @detectors == 1
331             # Just one detector => skip the if()else gobbledigook
332             ? sprintf $code_detect, $detectors[0], 'Assuming'
333             # Multiple detectors
334             : join('', map {
335             sprintf(
336             ($_ == 0 ? "if ( $detector ) { $body"
337             :$_ == $#detectors ? " else { $detector; $body"
338             : " elsif ( $detector ) { $body"),
339             $detectors[$_],
340             ($_ == $#detectors ? 'Assuming' : 'Detected'),
341             );
342             } 0..$#detectors
343             );
344              
345             $code = sprintf(q{
346             sub {
347             # local *__ANON__= "__ANON__deserialize__";
348             my $self = shift;
349              
350             my @out;
351             for my $serialized (@_) {
352             %s
353              
354             %s
355             }
356              
357             return wantarray ? @out
358             : @out > 1 ? die( sprintf "You have %%d deserialized structures, please call this method in list context", scalar @out )
359             : $out[0];
360              
361             return @out;
362             };},
363             $uncompress_code, $code
364             );
365              
366             warn $code if DEBUG >= 2;
367              
368             # inject the deserializers and detectors in the symbol table
369             # before we eval the code.
370             for (@detectors) {
371             my $class = ref $self;
372             no strict 'refs';
373             my $format = $self->get_format($_);
374             *{"$class\::__deserialize_$_"} = $format->{deserialize};
375             *{"$class\::__detect_$_"} = $format->{detect};
376             }
377              
378             my $coderef = eval $code or do{
379             my $error = $@ || 'Clobbed';
380             die "Couldn't create the deserialization coderef: $error\n The code is: $code\n";
381             };
382              
383             return $coderef;
384             }
385              
386             sub deserialize_from_file {
387             my $self = shift;
388             my $file = shift;
389              
390             if (not defined $file or not -r $file) {
391             Carp::croak("Need filename argument or can't read file");
392             }
393              
394             open my $fh, '<', $file;
395             local $/;
396             my $data = <$fh>;
397             my ($rv) = $self->deserialize($data);
398             return $rv;
399             }
400              
401             sub serialize_to_file {
402             my $self = shift;
403             my $data = shift;
404             my $file = shift;
405              
406             if (not defined $file) {
407             Carp::croak("Need filename argument");
408             }
409              
410             open my $fh, '>', $file;
411             print $fh $self->serialize($data);
412             close $fh;
413              
414             return 1;
415             }
416              
417             sub deserialize_from_fh {
418             my $self = shift;
419             my $fd = shift;
420              
421             if (not defined $fd) {
422             Carp::croak("Need file descriptor argument");
423             }
424              
425             local $/;
426             my $data = <$fd>;
427             my ($rv) = $self->deserialize($data);
428              
429             return $rv;
430             }
431              
432             sub serialize_to_fh {
433             my $self = shift;
434             my $data = shift;
435             my $fd = shift;
436              
437             if (not defined $fd) {
438             Carp::croak("Need file descriptor argument");
439             }
440              
441             print $fd $self->serialize($data);
442              
443             return 1;
444             }
445              
446              
447             1;
448              
449             __END__
450              
451             =pod
452              
453             =encoding utf8
454              
455             =head1 NAME
456              
457             Data::FlexSerializer - Pluggable (de-)serialization to/from compressed/uncompressed JSON/Storable/Sereal/Whatever
458              
459             =head1 DESCRIPTION
460              
461             This module was written to convert away from Storable throughout the
462             Booking.com codebase to other serialization formats such as Sereal and
463             JSON.
464              
465             Since we needed to do these migrations in production we had to do them
466             with zero downtime and deal with data stored on disk, in memcached or
467             in a database that we could only gradually migrate to the new format
468             as we read/wrote it.
469              
470             So we needed a module that deals with dynamically detecting what kind
471             of existing serialized data you have, and can dynamically convert it
472             to something else as it's written again.
473              
474             That's what this module does. Depending on the options you give it it
475             can read/write any combination of
476             B<compressed>/B<uncompressed>/B<maybe compressed>
477             B<Storable>/B<JSON>/B<Sereal> data. You can also easily extend it to
478             add support for your own input/output format in addition to the
479             defaults.
480              
481             =head1 SYNOPSIS
482              
483             When we originally wrote this we meant to convert everything over from
484             Storable to JSON. Since then mostly due to various issues with JSON
485             not accurately being able to represent Perl datastructures
486             (e.g. preserve encoding flags) we've started to migrate to
487             L<Sereal::Encoder|Sereal> (a L<new serialization
488             format|http://blog.booking.com/sereal-a-binary-data-serialization-format.html>
489             we wrote) instead.
490              
491             However the API of this module is now slightly awkward because now it
492             needs to deal with the possible detection and emission of multiple
493             formats, and it still uses the JSON format by default which is no
494             longer the recommended way to use it.
495              
496             # For all of the below
497             use Data::FlexSerializer;
498              
499             =head2 Reading and writing compressed JSON
500              
501             # We *only* read/write compressed JSON by default:
502             my $strict_serializer = Data::FlexSerializer->new;
503             my @blobs = $strict_serializer->serialize(@perl_datastructures);
504             my @perl_datastructures = $strict_serializer->deserialize(@blobs);
505              
506             =head2 Reading maybe compressed JSON and writing compressed JSON
507              
508             # We can optionally detect compressed JSON as well, will accept
509             # mixed compressed/uncompressed data. This works for all the input
510             # formats.
511             my $lax_serializer = Data::FlexSerializer->new(
512             detect_compression => 1,
513             );
514              
515             =head2 Reading definitely compressed JSON and writing compressed JSON
516              
517             # If we know that all our data is compressed we can skip the
518             # detection step. This works for all the input formats.
519             my $lax_compress = Data::FlexSerializer->new(
520             assume_compression => 1,
521             compress_output => 1, # This is the default
522             );
523              
524             =head2 Migrate from maybe compressed Storable to compressed JSON
525              
526             my $storable_to_json = Data::FlexSerializer->new(
527             detect_compression => 1, # check whether the input is compressed
528             detect_storable => 1, # accept Storable images as input
529             compress_output => 1, # This is the default
530             );
531              
532             =head2 Migrate from maybe compressed JSON to Sereal
533              
534             my $storable_to_sereal = Data::FlexSerializer->new(
535             detect_sereal => 1,
536             output_format => 'sereal',
537             );
538              
539             =head2 Migrate from Sereal to JSON
540              
541             my $sereal_backcompat = Data::FlexSerializer->new(
542             detect_sereal => 1, # accept Sereal images as input
543             );
544              
545             =head2 Migrate from JSON OR Storable to Sereal
546              
547             my $flex_to_json = Data::FlexSerializer->new(
548             detect_compression => 1,
549             detect_json => 1, # this is the default
550             detect_sereal => 1,
551             detect_storable => 1,
552             output_format => 'sereal',
553             );
554              
555             =head2 Migrate from JSON OR Storable to Sereal with custom Sereal objects
556              
557             my $flex_to_json = Data::FlexSerializer->new(
558             detect_compression => 1,
559             detect_json => 1, # this is the default
560             detect_sereal => 1,
561             detect_storable => 1,
562             output_format => 'sereal',
563             sereal_decoder => Sereal::Decoder->new(...),
564             sereal_encoder => Sereal::Encoder->new(...),
565             );
566              
567             =head2 Add your own format using Data::Dumper.
568              
569             See L<the documentation for add_format|add_format> below.
570              
571             =head1 ATTRIBUTES
572              
573             This is a L<Moose>-powered module so all of these are keys you can
574             pass to L</new>. They're all read-only after the class is constructed,
575             so you can look but you can't touch.
576              
577             =head1 METHODS
578              
579             =head2 assume_compression
580              
581             C<assume_compression> is a boolean flag that makes the deserialization
582             assume that the data will be compressed. It won't have to guess,
583             making the deserialization faster. Defaults to true.
584              
585             You almost definitely want to turn L</compress_output> off too if you
586             turn this off, unless you're doing a one-off migration or something.
587              
588             =head2 detect_compression
589              
590             C<detect_compression> is a boolean flag that also affects only the
591             deserialization step.
592              
593             If set, it'll auto-detect whether the input is compressed. Mutually
594             exclusive with C<assume_compression> (we'll die if you try to set
595             both).
596              
597             If you set C<detect_compression> we'll disable this for you, since it
598             doesn't make any sense to try to detect when you're going to assume.
599              
600             Defaults to false.
601              
602             =head2 compress_output
603              
604             C<compress_output> is a flag indicating whether compressed or uncompressed
605             dumps are to be generated during the serialization. Defaults to true.
606              
607             You probably to turn L</assume_compression> off too if you turn this
608             off, unless you're doing a one-off migration or something.
609              
610             =head2 compression_level
611              
612             C<compression_level> is an integer indicating the compression level (0-9).
613              
614             =head2 output_format
615              
616             C<output_format> can be either set to the string C<json> (default),
617             C<storable>, C<sereal> or your own format that you've added via L</add_format>.
618              
619             =head2 detect_FORMAT_NAME
620              
621             Whether we should detect this incoming format. By default only
622             C<detect_json> is true. You can also set C<detect_storable>,
623             C<detect_sereal> or C<detect_YOUR_FORMAT> for formats added via
624             L</add_format>.
625              
626             =head2 sereal_encoder
627              
628             =head2 sereal_decoder
629              
630             You can supply C<sereal_encoder> or C<sereal_decoder> arguments with
631             your own Serial decoder/encoder objects. Handy if you want to pass
632             custom options to the encoder or decoder.
633              
634             By default we create objects for you at BUILD time. So you don't need
635             to supply this for optimization purposes either.
636              
637             =head1 METHODS
638              
639             =head2 serialize
640              
641             Given a list of things to serialize, this does the job on each of them and
642             returns a list of serialized blobs.
643              
644             In scalar context, this will return a single serialized blob instead of a
645             list. If called in scalar context, but passed a list of things to serialize,
646             this will croak because the call makes no sense.
647              
648             =head2 deserialize
649              
650             The opposite of C<serialize>, doh.
651              
652             =head2 deserialize_from_file
653              
654             Given a (single!) file name, reads the file contents and deserializes them.
655             Returns the resulting Perl data structure.
656              
657             Since this works on one file at a time, this doesn't return a list of
658             data structures like C<deserialize()> does.
659              
660             =head2 serialize_to_file
661              
662             $serializer->serialize_to_file(
663             $data_structure => '/tmp/foo/bar'
664             );
665              
666             Given a (single!) Perl data structure, and a (single!) file name,
667             serializes the data structure and writes the result to the given file.
668             Returns true on success, dies on failure.
669              
670             =head1 CLASS METHODS
671              
672             =head2 add_format
673              
674             C<add_format> class method to add support for custom formats.
675              
676             Data::FlexSerializer->add_format(
677             data_dumper => {
678             serialize => sub { shift; goto \&Data::Dumper::Dumper },
679             deserialize => sub { shift; my $VAR1; eval "$_[0]" },
680             detect => sub { $_[1] =~ /\$[\w]+\s*=/ },
681             }
682             );
683              
684             my $flex_to_dd = Data::FlexSerializer->new(
685             detect_data_dumper => 1,
686             output_format => 'data_dumper',
687             );
688              
689             =head1 AUTHOR
690              
691             Steffen Mueller <smueller@cpan.org>
692              
693             Ævar Arnfjörð Bjarmason <avar@cpan.org>
694              
695             Burak Gürsoy <burak@cpan.org>
696              
697             Elizabeth Matthijsen <liz@dijkmat.nl>
698              
699             Caio Romão Costa Nascimento <cpan@caioromao.com>
700              
701             Jonas Galhordas Duarte Alves <jgda@cpan.org>
702              
703             =head1 ACKNOWLEDGMENT
704              
705             This module was originally developed at and for Booking.com.
706             With approval from Booking.com, this module was generalized
707             and put on CPAN, for which the authors would like to express
708             their gratitude.
709              
710             =head1 COPYRIGHT AND LICENSE
711              
712             (C) 2011, 2012, 2013 Steffen Mueller and others. All rights reserved.
713              
714             This code is available under the same license as Perl version
715             5.8.1 or higher.
716              
717             This program is distributed in the hope that it will be useful,
718             but WITHOUT ANY WARRANTY; without even the implied warranty of
719             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
720              
721             =cut