File Coverage

blib/lib/Data/Clean/ForJSON.pm
Criterion Covered Total %
statement 35 37 94.5
branch 1 2 50.0
condition 11 23 47.8
subroutine 9 10 90.0
pod 4 4 100.0
total 60 76 78.9


line stmt bran cond sub pod time code
1             package Data::Clean::ForJSON;
2              
3             our $DATE = '2019-11-26'; # DATE
4             our $VERSION = '0.395'; # VERSION
5              
6 1     1   84150 use 5.010001;
  1         11  
7 1     1   5 use strict;
  1         1  
  1         17  
8 1     1   4 use warnings;
  1         1  
  1         22  
9              
10 1     1   357 use parent qw(Data::Clean);
  1         255  
  1         4  
11 1     1   3990 use vars qw($creating_singleton);
  1         2  
  1         34  
12              
13 1     1   5 use Exporter qw(import);
  1         2  
  1         200  
14             our @EXPORT_OK = qw(
15             clean_json_in_place
16             clone_and_clean_json
17             );
18              
19             sub new {
20 1     1 1 4 my ($class, %opts) = @_;
21              
22 1 50 33     8 if (!%opts && !$creating_singleton) {
23 0         0 warn "You are creating a new ".__PACKAGE__." object without customizing options. ".
24             "You probably want to call get_cleanser() yet to get a singleton instead?";
25             }
26              
27 1   50     7 $opts{DateTime} //= [call_method => 'epoch'];
28 1   50     6 $opts{'Time::Moment'} //= [call_method => 'epoch'];
29 1   50     6 $opts{'Math::BigInt'} //= [call_method => 'bstr'];
30 1   50     5 $opts{Regexp} //= ['stringify'];
31 1   50     7 $opts{version} //= ['stringify'];
32              
33 1   50     5 $opts{SCALAR} //= ['deref_scalar'];
34 1   50     5 $opts{-ref} //= ['replace_with_ref'];
35 1   50     5 $opts{-circular} //= ['clone'];
36 1   50     4 $opts{-obj} //= ['unbless'];
37              
38 1   50     5 $opts{'!recurse_obj'} //= 1;
39 1         14 $class->SUPER::new(%opts);
40             }
41              
42             sub get_cleanser {
43 2     2 1 101 my $class = shift;
44 2         4 local $creating_singleton = 1;
45 2         5 state $singleton = $class->new;
46 2         2721 $singleton;
47             }
48              
49             sub clean_json_in_place {
50 1     1 1 10488 __PACKAGE__->get_cleanser->clean_in_place(@_);
51             }
52              
53             sub clone_and_clean_json {
54 0     0 1   __PACKAGE__->get_cleanser->clone_and_clean(@_);
55             }
56              
57             1;
58             # ABSTRACT: Clean data so it is safe to output to JSON
59              
60             __END__
61              
62             =pod
63              
64             =encoding UTF-8
65              
66             =head1 NAME
67              
68             Data::Clean::ForJSON - Clean data so it is safe to output to JSON
69              
70             =head1 VERSION
71              
72             This document describes version 0.395 of Data::Clean::ForJSON (from Perl distribution Data-Clean-ForJSON), released on 2019-11-26.
73              
74             =head1 SYNOPSIS
75              
76             use Data::Clean::ForJSON;
77             my $cleanser = Data::Clean::ForJSON->get_cleanser;
78             my $data = { code=>sub {}, re=>qr/abc/i };
79              
80             my $cleaned;
81              
82             # modifies data in-place
83             $cleaned = $cleanser->clean_in_place($data);
84              
85             # ditto, but deep clone first, return
86             $cleaned = $cleanser->clone_and_clean($data);
87              
88             # now output it
89             use JSON;
90             print encode_json($cleaned); # prints '{"code":"CODE","re":"(?^i:abc)"}'
91              
92             Functional shortcuts:
93              
94             use Data::Clean::ForJSON qw(clean_json_in_place clone_and_clean_json);
95              
96             # equivalent to Data::Clean::ForJSON->get_cleanser->clean_in_place($data)
97             clean_json_in_place($data);
98              
99             # equivalent to Data::Clean::ForJSON->get_cleanser->clone_and_clean($data)
100             $cleaned = clone_and_clean_json($data);
101              
102             =head1 DESCRIPTION
103              
104             This class cleans data from anything that might be problematic when encoding to
105             JSON. This includes coderefs, globs, and so on. Here's what it will do by
106             default:
107              
108             =over
109              
110             =item * Change DateTime and Time::Moment object to its epoch value
111              
112             =item * Change Regexp and version object to its string value
113              
114             =item * Change scalar references (e.g. \1) to its scalar value (e.g. 1)
115              
116             =item * Change other references (non-hash, non-array) to its ref() value (e.g. "GLOB", "CODE")
117              
118             =item * Clone circular references
119              
120             With a default limit of 1, meaning that if a reference is first seen again for
121             the first time, it will be cloned. But if it is seen again for the second time,
122             it will be replaced with "CIRCULAR".
123              
124             To change the default limit, customize your cleanser object:
125              
126             $cleanser = Data::Clean::ForJSON->new(
127             -circular => ["clone", 4],
128             );
129              
130             or you can perform other action for circular references, see L<Data::Clean> for
131             more details.
132              
133             =item * Unbless other types of objects
134              
135             =back
136              
137             Cleaning recurses into objects.
138              
139             Data that has been cleaned will probably not be convertible back to the
140             original, due to information loss (for example, coderefs converted to string
141             C<"CODE">).
142              
143             The design goals are good performance, good defaults, and just enough
144             flexibility. The original use-case is for returning JSON response in HTTP API
145             service.
146              
147             This module is significantly faster than modules like L<Data::Rmap> or
148             L<Data::Visitor::Callback> because with something like Data::Rmap you repeatedly
149             invoke callback for each data item. This module, on the other hand, generates a
150             cleanser code using eval(), using native Perl for() loops.
151              
152             If C<LOG_CLEANSER_CODE> environment is set to true, the generated cleanser code
153             will be logged using L<Log::ger> at trace level. You can see it, e.g. using
154             L<Log::ger::Output::Screen>:
155              
156             % LOG_CLEANSER_CODE=1 perl -MLog::ger::Output=Screen -MLog::ger::Level::trace -MData::Clean::ForJSON \
157             -e'$c=Data::Clean::ForJSON->new; ...'
158              
159             =head1 FUNCTIONS
160              
161             None of the functions are exported by default.
162              
163             =head2 clean_json_in_place($data)
164              
165             A shortcut for:
166              
167             Data::Clean::ForJSON->get_cleanser->clean_in_place($data)
168              
169             =head2 clone_and_clean_json($data) => $cleaned
170              
171             A shortcut for:
172              
173             $cleaned = Data::Clean::ForJSON->get_cleanser->clone_and_clean($data)
174              
175             =head1 METHODS
176              
177             =head2 CLASS->get_cleanser => $obj
178              
179             Return a singleton instance, with default options. Use C<new()> if you want to
180             customize options.
181              
182             =head2 CLASS->new() => $obj
183              
184             Create a new instance.
185              
186             =head2 $obj->clean_in_place($data) => $cleaned
187              
188             Clean $data. Modify data in-place.
189              
190             =head2 $obj->clone_and_clean($data) => $cleaned
191              
192             Clean $data. Clone $data first.
193              
194             =head1 FAQ
195              
196             =head2 Why clone/modify? Why not directly output JSON?
197              
198             So that the data can be used for other stuffs, like outputting to YAML, etc.
199              
200             =head2 Why is it slow?
201              
202             If you use C<new()> instead of C<get_cleanser()>, make sure that you do not
203             construct the Data::Clean::ForJSON object repeatedly, as the constructor
204             generates the cleanser code first using eval(). A short benchmark (run on my
205             slow Atom netbook):
206              
207             % bench -MData::Clean::ForJSON -b'$c=Data::Clean::ForJSON->new' \
208             'Data::Clean::ForJSON->new->clone_and_clean([1..100])' \
209             '$c->clone_and_clean([1..100])'
210             Benchmarking sub { Data::Clean::ForJSON->new->clean_in_place([1..100]) }, sub { $c->clean_in_place([1..100]) } ...
211             a: 302 calls (291.3/s), 1.037s (3.433ms/call)
212             b: 7043 calls (4996/s), 1.410s (0.200ms/call)
213             Fastest is b (17.15x a)
214              
215             Second, you can turn off some checks if you are sure you will not be getting bad
216             data. For example, if you know that your input will not contain circular
217             references, you can turn off circular detection:
218              
219             $cleanser = Data::Clean::ForJSON->new(-circular => 0);
220              
221             Benchmark:
222              
223             $ perl -MData::Clean::ForJSON -MBench -E '
224             $data = [[1],[2],[3],[4],[5]];
225             bench {
226             circ => sub { state $c = Data::Clean::ForJSON->new; $c->clone_and_clean($data) },
227             nocirc => sub { state $c = Data::Clean::ForJSON->new(-circular=>0); $c->clone_and_clean($data) }
228             }, -1'
229             circ: 9456 calls (9425/s), 1.003s (0.106ms/call)
230             nocirc: 13161 calls (12885/s), 1.021s (0.0776ms/call)
231             Fastest is nocirc (1.367x circ)
232              
233             The less number of checks you do, the faster the cleansing process will be.
234              
235             =head2 Why am I getting 'Not a CODE reference at lib/Data/Clean.pm line xxx'?
236              
237             [2013-08-07 ] This error message is from Data::Clone::clone() when it is cloning
238             an object. If you are cleaning objects, instead of using clone_and_clean(), try
239             using clean_in_place(). Or, clone your data first using something else like
240             L<Sereal>.
241              
242             =head1 ENVIRONMENT
243              
244             =head2 LOG_CLEANSER_CODE
245              
246             Bool. Can be set to true to log cleanser code using L<Log::ger> at C<trace>
247             level.
248              
249             =head1 HOMEPAGE
250              
251             Please visit the project's homepage at L<https://metacpan.org/release/Data-Clean-ForJSON>.
252              
253             =head1 SOURCE
254              
255             Source repository is at L<https://github.com/perlancar/perl-Data-Clean-ForJSON>.
256              
257             =head1 BUGS
258              
259             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Clean-ForJSON>
260              
261             When submitting a bug or request, please include a test-file or a
262             patch to an existing test-file that illustrates the bug or desired
263             feature.
264              
265             =head1 SEE ALSO
266              
267             L<Data::Rmap>
268              
269             L<Data::Visitor::Callback>
270              
271             L<Data::Abridge> is similar in goal, which is to let Perl data structures (which
272             might contain stuffs unsupported in JSON) be encodeable to JSON. But unlike
273             Data::Clean::ForJSON, it has some (currently) non-configurable rules, like
274             changing a coderef with a hash C<< {CODE=>'\&main::__ANON__'} >> or a scalar ref
275             with C<< {SCALAR=>'value'} >> and so on. Note that the abridging process is
276             similarly unidirectional (you cannot convert back the original Perl data
277             structure).
278              
279             Some benchmarks in L<Bencher::Scenarios::DataCleansing>. You can see that
280             Data::Clean::ForJSON can be several times faster than, say, Data::Rmap.
281              
282             =head1 AUTHOR
283              
284             perlancar <perlancar@cpan.org>
285              
286             =head1 COPYRIGHT AND LICENSE
287              
288             This software is copyright (c) 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012 by perlancar@cpan.org.
289              
290             This is free software; you can redistribute it and/or modify it under
291             the same terms as the Perl 5 programming language system itself.
292              
293             =cut