File Coverage

blib/lib/Data/Clean.pm
Criterion Covered Total %
statement 153 176 86.9
branch 33 54 61.1
condition 11 21 52.3
subroutine 21 26 80.7
pod 3 15 20.0
total 221 292 75.6


line stmt bran cond sub pod time code
1             package Data::Clean;
2              
3             our $DATE = '2020-04-07'; # DATE
4             our $VERSION = '0.507'; # VERSION
5              
6 1     1   646 use 5.010001;
  1         10  
7 1     1   5 use strict;
  1         2  
  1         31  
8 1     1   6 use warnings;
  1         1  
  1         24  
9 1     1   1781 use Log::ger;
  1         65  
  1         5  
10              
11             sub new {
12 11     11 1 546762 my ($class, %opts) = @_;
13 11         34 my $self = bless {_opts=>\%opts}, $class;
14 11         38 log_trace("Cleanser options: %s", \%opts);
15              
16 11         40 my $cd = $self->_generate_cleanser_code;
17 9         16 for my $mod (keys %{ $cd->{modules} }) {
  9         31  
18 18         1260 (my $mod_pm = "$mod.pm") =~ s!::!/!g;
19 18         651 require $mod_pm;
20             }
21 9         23 $self->{_cd} = $cd;
22 9         4320 $self->{_code} = eval $cd->{src};
23             {
24 9 50       24 last unless $cd->{clone_func} =~ /(.+)::(.+)/;
  9         52  
25 9         48 (my $mod_pm = "$1.pm") =~ s!::!/!g;
26 9         46 require $mod_pm;
27             }
28 9 50       23 die "Can't generate code: $@" if $@;
29              
30 9         37 $self;
31             }
32              
33             sub command_call_method {
34 1     1 0 5 my ($self, $cd, $args) = @_;
35 1         3 my $mn = $args->[0];
36 1 50       28 die "Invalid method name syntax" unless $mn =~ /\A\w+\z/;
37 0         0 return "{{var}} = {{var}}->$mn; \$ref = ref({{var}})";
38             }
39              
40             sub command_call_func {
41 2     2 0 5 my ($self, $cd, $args) = @_;
42 2         4 my $fn = $args->[0];
43 2 100       36 die "Invalid func name syntax" unless $fn =~ /\A\w+(::\w+)*\z/;
44 1         5 return "{{var}} = $fn({{var}}); \$ref = ref({{var}})";
45             }
46              
47             sub command_one_or_zero {
48 0     0 0 0 my ($self, $cd, $args) = @_;
49 0         0 return "{{var}} = {{var}} ? 1:0; \$ref = ''";
50             }
51              
52             sub command_deref_scalar_one_or_zero {
53 0     0 0 0 my ($self, $cd, $args) = @_;
54 0         0 return "{{var}} = \${ {{var}} } ? 1:0; \$ref = ''";
55             }
56              
57             sub command_deref_scalar {
58 0     0 0 0 my ($self, $cd, $args) = @_;
59 0         0 return '{{var}} = ${ {{var}} }; $ref = ref({{var}})';
60             }
61              
62             sub command_stringify {
63 0     0 0 0 my ($self, $cd, $args) = @_;
64 0         0 return '{{var}} = "{{var}}"; $ref = ""';
65             }
66              
67             sub command_replace_with_ref {
68 2     2 0 8 my ($self, $cd, $args) = @_;
69 2         5 return '{{var}} = $ref; $ref = ""';
70             }
71              
72             sub command_replace_with_str {
73 3     3 0 556 require String::PerlQuote;
74              
75 3         612 my ($self, $cd, $args) = @_;
76 3         12 return "{{var}} = ".String::PerlQuote::double_quote($args->[0]).'; $ref=""';
77             }
78              
79             sub command_unbless {
80 1     1 0 6 my ($self, $cd, $args) = @_;
81              
82 1         3 return join(
83             "",
84             'my $reftype = Scalar::Util::reftype({{var}}); ',
85             '{{var}} = $reftype eq "HASH" ? {%{ {{var}} }} :',
86             ' $reftype eq "ARRAY" ? [@{ {{var}} }] :',
87             ' $reftype eq "SCALAR" ? \(my $copy = ${ {{var}} }) :',
88             ' $reftype eq "CODE" ? sub { goto &{ {{var}} } } :',
89             '(die "Cannot unbless object with type $ref")',
90             );
91             }
92              
93             sub command_clone {
94 2     2 0 5 my ($self, $cd, $args) = @_;
95              
96 2   50     6 my $limit = $args->[0] // 1;
97 2         10 return join(
98             "",
99             "if (++\$ctr_circ <= $limit) { ",
100             "{{var}} = $cd->{clone_func}({{var}}); redo ",
101             "} else { ",
102             "{{var}} = 'CIRCULAR'; \$ref = '' }",
103             );
104             }
105              
106             sub command_unbless_ffc_inlined {
107 1     1 0 3 my ($self, $cd, $args) = @_;
108              
109             # code taken from Function::Fallback::CoreOrPP 0.07
110 1   50     10 $cd->{subs}{unbless} //= <<'EOC';
111             my $ref = shift;
112              
113             my $r = ref($ref);
114             # not a reference
115             return $ref unless $r;
116              
117             # return if not a blessed ref
118             my ($r2, $r3) = "$ref" =~ /(.+)=(.+?)\(/
119             or return $ref;
120              
121             if ($r3 eq 'HASH') {
122             return { %$ref };
123             } elsif ($r3 eq 'ARRAY') {
124             return [ @$ref ];
125             } elsif ($r3 eq 'SCALAR') {
126             return \( my $copy = ${$ref} );
127             } else {
128             die "Can't handle $ref";
129             }
130             EOC
131              
132 1         3 "{{var}} = \$sub_unbless->({{var}}); \$ref = ref({{var}})";
133             }
134              
135             # test
136             sub command_die {
137 0     0 0 0 my ($self, $cd, $args) = @_;
138 0         0 return "die";
139             }
140              
141             sub _generate_cleanser_code {
142 11     11   20 my $self = shift;
143 11         21 my $opts = $self->{_opts};
144              
145             # compilation data, a structure that will be passed around between routines
146             # during the generation of cleanser code.
147             my $cd = {
148             modules => {}, # key = module name, val = version
149 11         42 clone_func => $self->{_opts}{'!clone_func'},
150             code => '',
151             subs => {},
152             };
153              
154 11   50     60 $cd->{modules}{'Scalar::Util'} //= 0;
155 11 50 0     28 $cd->{modules}{'Data::Dmp'} //= 0 if $opts->{'!debug'};
156              
157 11 50       27 if (!$cd->{clone_func}) {
158 11         21 $cd->{clone_func} = 'Clone::PP::clone';
159             }
160             {
161 11 50       14 last unless $cd->{clone_func} =~ /(.+)::(.+)/;
  11         74  
162 11   50     49 $cd->{modules}{$1} //= 0;
163             }
164              
165 11         40 my (@code, @stmts_ary, @stmts_hash, @stmts_main);
166              
167 11         16 my $n = 0;
168             my $add_stmt = sub {
169 30     30   43 my $which = shift;
170 30 100 100     99 if ($which eq 'if' || $which eq 'new_if') {
171 28         47 my ($cond0, $act0) = @_;
172 28         111 for ([\@stmts_ary, '$e', 'ary'],
173             [\@stmts_hash, '$h->{$k}', 'hash'],
174             [\@stmts_main, '$_', 'main']) {
175 84         119 my $act = $act0 ; $act =~ s/\Q{{var}}\E/$_->[1]/g;
  84         316  
176 84         140 my $cond = $cond0; $cond =~ s/\Q{{var}}\E/$_->[1]/g;
  84         146  
177 84 0       154 if ($opts->{'!debug'}) { unless (@{ $_->[0] }) { push @{ $_->[0] }, ' print "DEBUG:'.$_->[2].' cleaner: val=", Data::Dmp::dmp_ellipsis('.$_->[1].'), ", ref=$ref\n"; '."\n" } }
  0 50       0  
  0         0  
  0         0  
  0         0  
178 84 100 100     99 push @{ $_->[0] }, " ".($n && $which ne 'new_if' ? "els":"")."if ($cond) { $act }\n";
  84         452  
179             }
180 28         86 $n++;
181             } else {
182 2         4 my ($stmt0) = @_;
183 2         6 for ([\@stmts_ary, '$e', 'ary'],
184             [\@stmts_hash, '$h->{$k}', 'hash'],
185             [\@stmts_main, '$_', 'main']) {
186 6         9 my $stmt = $stmt0; $stmt =~ s/\Q{{var}}\E/$_->[1]/g;
  6         21  
187 6         7 push @{ $_->[0] }, " $stmt;\n";
  6         20  
188             }
189             }
190 11         73 };
191             my $add_if = sub {
192 16     16   44 $add_stmt->('if', @_);
193 11         33 };
194             my $add_new_if = sub {
195 12     12   25 $add_stmt->('new_if', @_);
196 11         25 };
197             my $add_if_ref = sub {
198 10     10   20 my ($ref, $act0) = @_;
199 10         25 $add_if->("\$ref eq '$ref'", $act0);
200 11         49 };
201             my $add_new_if_ref = sub {
202 7     7   15 my ($ref, $act0) = @_;
203 7         18 $add_new_if->("\$ref eq '$ref'", $act0);
204 11         31 };
205              
206             # catch circular references
207 11         19 my $circ = $opts->{-circular};
208 11 100       28 if ($circ) {
209 3         8 my $meth = "command_$circ->[0]";
210 3 50       15 die "Can't handle command $circ->[0] for option '-circular'" unless $self->can($meth);
211 3         8 my @args = @$circ; shift @args;
  3         6  
212 3         10 my $act = $self->$meth($cd, \@args);
213 3 50       27 if ($opts->{'!debug'}) { $add_stmt->('stmt', 'print "DEBUG: main cleaner: ref=$ref, " . {{var}} . "\n"'); }
  0         0  
214 3         9 $add_new_if->('$ref && $refs{ {{var}} }++', $act);
215             }
216              
217             # catch object of specified classes (e.g. DateTime, etc)
218 11         39 for my $on (grep {/\A\w*(::\w+)*\z/} sort keys %$opts) {
  14         78  
219 3         7 my $o = $opts->{$on};
220 3 50       8 next unless $o;
221 3         8 my $meth = "command_$o->[0]";
222 3 50       29 die "Can't handle command $o->[0] for option '$on'" unless $self->can($meth);
223 3         13 my @args = @$o; shift @args;
  3         4  
224 3         8 my $act = $self->$meth($cd, \@args);
225 3         17 $add_if_ref->($on, $act);
226             }
227              
228             # catch general object not caught by previous
229 11         30 for my $p ([-obj => 'Scalar::Util::blessed({{var}})']) {
230 11         23 my $o = $opts->{$p->[0]};
231 11 100       28 next unless $o;
232 6         13 my $meth = "command_$o->[0]";
233 6 50       34 die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
234 6         16 my @args = @$o; shift @args;
  6         11  
235 6         28 $add_if->($p->[1], $self->$meth($cd, \@args));
236             }
237              
238             # recurse array and hash
239 9 100       27 if ($opts->{'!recurse_obj'}) {
240 2         12 $add_stmt->('stmt', 'my $reftype=Scalar::Util::reftype({{var}})//""');
241 2         7 $add_new_if->('$reftype eq "ARRAY"', '$process_array->({{var}})');
242 2         3 $add_if->('$reftype eq "HASH"' , '$process_hash->({{var}})');
243             } else {
244 7         17 $add_new_if_ref->("ARRAY", '$process_array->({{var}})');
245 7         25 $add_if_ref->("HASH" , '$process_hash->({{var}})');
246             }
247              
248             # lastly, catch any reference left
249 9         20 for my $p ([-ref => '$ref']) {
250 9         20 my $o = $opts->{$p->[0]};
251 9 50       22 next unless $o;
252 0         0 my $meth = "command_$o->[0]";
253 0 0       0 die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
254 0         0 my @args = @$o; shift @args;
  0         0  
255 0         0 $add_if->($p->[1], $self->$meth($cd, \@args));
256             }
257              
258 9         19 push @code, 'sub {'."\n";
259              
260 9         47 for (sort keys %{$cd->{subs}}) {
  9         30  
261 1         19 push @code, "state \$sub_$_ = sub { ".$cd->{subs}{$_}." };\n";
262             }
263              
264 9         22 push @code, 'my $data = shift;'."\n";
265 9 100       21 push @code, 'state %refs;'."\n" if $circ;
266 9 100       16 push @code, 'state $ctr_circ;'."\n" if $circ;
267 9         14 push @code, 'state $process_array;'."\n";
268 9         16 push @code, 'state $process_hash;'."\n";
269 9         33 push @code, (
270             'if (!$process_array) { $process_array = sub { my $a = shift; for my $e (@$a) { ',
271             'my $ref=ref($e);'."\n",
272             join("", @stmts_ary).'} } }'."\n"
273             );
274 9         30 push @code, (
275             'if (!$process_hash) { $process_hash = sub { my $h = shift; for my $k (keys %$h) { ',
276             'my $ref=ref($h->{$k});'."\n",
277             join("", @stmts_hash).'} } }'."\n"
278             );
279 9 100       19 push @code, '%refs = (); $ctr_circ=0;'."\n" if $circ;
280 9         42 push @code, (
281             'for ($data) { ',
282             'my $ref=ref($_);'."\n",
283             join("", @stmts_main).'}'."\n"
284             );
285 9 50       30 push @code, 'print "DEBUG: main cleaner: result: ", Data::Dmp::dmp_ellipsis($data), "\n";'."\n" if $opts->{'!debug'};
286 9         19 push @code, '$data'."\n";
287 9         14 push @code, '}'."\n";
288              
289 9         75 my $code = join("", @code).";";
290              
291 9 50 33     32 if ($ENV{LOG_CLEANSER_CODE} && log_is_trace()) {
292 0         0 require String::LineNumber;
293             log_trace("Cleanser code:\n%s",
294 0 0 0     0 $ENV{LINENUM} // 1 ?
295             String::LineNumber::linenum($code) : $code);
296             }
297              
298 9         17 $cd->{src} = $code;
299              
300 9         151 $cd;
301             }
302              
303             sub clean_in_place {
304 11     11 1 1436 my ($self, $data) = @_;
305              
306 11         242 $self->{_code}->($data);
307             }
308              
309             sub clone_and_clean {
310 1     1   2686 no strict 'refs';
  1         3  
  1         109  
311              
312 4     4 1 33 my ($self, $data) = @_;
313 4         5 my $clone = &{$self->{_cd}{clone_func}}($data);
  4         21  
314 4         448 $self->clean_in_place($clone);
315             }
316              
317             1;
318             # ABSTRACT: Clean data structure
319              
320             __END__
321              
322             =pod
323              
324             =encoding UTF-8
325              
326             =head1 NAME
327              
328             Data::Clean - Clean data structure
329              
330             =head1 VERSION
331              
332             This document describes version 0.507 of Data::Clean (from Perl distribution Data-Clean), released on 2020-04-07.
333              
334             =head1 SYNOPSIS
335              
336             use Data::Clean;
337              
338             my $cleanser = Data::Clean->new(
339             # specify how to deal with specific classes
340             'DateTime' => [call_method => 'epoch'], # replace object with its epoch
341             'Time::Moment' => [call_method => 'epoch'], # replace object with its epoch
342             'Regexp' => ['stringify'], # replace $obj with "$obj"
343              
344             # specify how to deal with all scalar refs
345             SCALAR => ['deref_scalar'], # replace \1 with 1
346              
347             # specify how to deal with circular reference
348             -circular => ['clone'],
349              
350             # specify how to deal with all other kinds of objects
351             -obj => ['unbless'],
352              
353             # recurse into object
354             #'!recurse_obj'=> 1,
355              
356             # generate cleaner with debugging messages
357             #'!debug' => 1,
358             );
359              
360             # to get cleansed data
361             my $cleansed_data = $cleanser->clone_and_clean($data);
362              
363             # to replace original data with cleansed one
364             $cleanser->clean_in_place($data);
365              
366             =head1 DESCRIPTION
367              
368             This class can be used to process a data structure by replacing some forms of
369             data items with other forms. One of the main uses is to clean "unsafe" data,
370             e.g. clean a data structure so it can be encoded to JSON (see
371             L<Data::Clean::ForJSON>, which is a thin wrapper over this class).
372              
373             As can be seen from the example, you specify a list of transformations to be
374             done, and then this class will generate an appropriate Perl code to do the
375             cleansing. This class is faster than the other ways of processing, e.g.
376             L<Data::Rmap> (see L<Bencher::Scenarios::DataCleansing> for some benchmarks).
377              
378             =for Pod::Coverage ^(command_.+)$
379              
380             =head1 METHODS
381              
382             =head2 new(%opts) => $obj
383              
384             Create a new instance.
385              
386             Options specify what to do with certain category of data. Option keys are either
387             reference types (like C<HASH>, C<ARRAY>, C<SCALAR>) or class names (like
388             C<Foo::Bar>), or C<-obj> (to match all kinds of objects, a.k.a. blessed
389             references), C<-circular> (to match circular references), C<-ref> (to refer to
390             any kind of references, used to process references not handled by other
391             options). Option values are arrayrefs, the first element of the array is command
392             name, to specify what to do with the reference/class. The rest are command
393             arguments.
394              
395             Note that arrayrefs and hashrefs are always walked into, so it's not trapped by
396             C<-ref>.
397              
398             Default for C<%opts>: C<< -ref => 'stringify' >>.
399              
400             Option keys that start with C<!> are special:
401              
402             =over
403              
404             =item * !recurse_obj (bool)
405              
406             Can be set to true to to recurse into objects if they are hash- or array-based.
407             By default objects are not recursed into. Note that if you enable this option,
408             object options (like C<Foo::Bar> or C<-obj>) won't work for hash- and
409             array-based objects because they will be recursed instead.
410              
411             =item * !clone_func (str)
412              
413             Set fully qualified name of clone function to use. The default is to use
414             C<Clone::PP::clone>.
415              
416             The clone module (all but the last part of the C<!clone_func> value) will
417             automatically be loaded using C<require()>.
418              
419             =item * !debug (bool)
420              
421             If set to true, will generate code to print debugging messages. For debugging
422             only.
423              
424             =back
425              
426             Available commands:
427              
428             =over 4
429              
430             =item * ['stringify']
431              
432             This will stringify a reference like C<{}> to something like C<HASH(0x135f998)>.
433              
434             =item * ['replace_with_ref']
435              
436             This will replace a reference like C<{}> with C<HASH>.
437              
438             =item * ['replace_with_str', STR]
439              
440             This will replace a reference like C<{}> with I<STR>.
441              
442             =item * ['call_method' => STR]
443              
444             This will call a method named I<STR> and use its return as the replacement. For
445             example: C<< DateTime->from_epoch(epoch=>1000) >> when processed with C<<
446             [call_method => 'epoch'] >> will become 1000.
447              
448             =item * ['call_func', STR]
449              
450             This will call a function named I<STR> with value as argument and use its return
451             as the replacement.
452              
453             =item * ['one_or_zero']
454              
455             This will perform C<< $val ? 1:0 >>.
456              
457             =item * ['deref_scalar_one_or_zero']
458              
459             This will perform C<< ${$val} ? 1:0 >>.
460              
461             =item * ['deref_scalar']
462              
463             This will replace a scalar reference like \1 with 1.
464              
465             =item * ['unbless']
466              
467             This will perform unblessing using L<Function::Fallback::CoreOrPP::unbless()>.
468             Should be done only for objects (C<-obj>).
469              
470             =item * ['die']
471              
472             Die. Only for testing.
473              
474             =item * ['code', STR]
475              
476             This will replace with I<STR> treated as Perl code.
477              
478             =item * ['clone', INT]
479              
480             This command is useful if you have circular references and want to expand/copy
481             them. For example:
482              
483             my $def_opts = { opt1 => 'default', opt2 => 0 };
484             my $users = { alice => $def_opts, bob => $def_opts, charlie => $def_opts };
485              
486             C<$users> contains three references to the same data structure. With the default
487             behaviour of C<< -circular => [replace_with_str => 'CIRCULAR'] >> the cleaned
488             data structure will be:
489              
490             { alice => { opt1 => 'default', opt2 => 0 },
491             bob => 'CIRCULAR',
492             charlie => 'CIRCULAR' }
493              
494             But with C<< -circular => ['clone'] >> option, the data structure will be
495             cleaned to become (the C<$def_opts> is cloned):
496              
497             { alice => { opt1 => 'default', opt2 => 0 },
498             bob => { opt1 => 'default', opt2 => 0 },
499             charlie => { opt1 => 'default', opt2 => 0 }, }
500              
501             The command argument specifies the number of references to clone as a limit (the
502             default is 50), since a cyclical structure can lead to infinite cloning. Above
503             this limit, the circular references will be replaced with a string
504             C<"CIRCULAR">. For example:
505              
506             my $a = [1]; push @$a, $a;
507              
508             With C<< -circular => ['clone', 2] >> the data will be cleaned as:
509              
510             [1, [1, [1, "CIRCULAR"]]]
511              
512             With C<< -circular => ['clone', 3] >> the data will be cleaned as:
513              
514             [1, [1, [1, [1, "CIRCULAR"]]]]
515              
516             =back
517              
518             =head2 $obj->clean_in_place($data) => $cleaned
519              
520             Clean $data. Modify data in-place.
521              
522             =head2 $obj->clone_and_clean($data) => $cleaned
523              
524             Clean $data. Clone $data first.
525              
526             =head1 ENVIRONMENT
527              
528             =over
529              
530             =item * LOG_CLEANSER_CODE => BOOL (default: 0)
531              
532             Can be enabled if you want to see the generated cleanser code. It is logged at
533             level C<trace> using L<Log::ger>.
534              
535             =item * LINENUM => BOOL (default: 1)
536              
537             When logging cleanser code, whether to give line numbers.
538              
539             =back
540              
541             =head1 HOMEPAGE
542              
543             Please visit the project's homepage at L<https://metacpan.org/release/Data-Clean>.
544              
545             =head1 SOURCE
546              
547             Source repository is at L<https://github.com/perlancar/perl-Data-Clean>.
548              
549             =head1 BUGS
550              
551             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Clean>
552              
553             When submitting a bug or request, please include a test-file or a
554             patch to an existing test-file that illustrates the bug or desired
555             feature.
556              
557             =head1 SEE ALSO
558              
559             Related modules: L<Data::Rmap>, L<Hash::Sanitize>, L<Data::Walk>.
560              
561             =head1 AUTHOR
562              
563             perlancar <perlancar@cpan.org>
564              
565             =head1 COPYRIGHT AND LICENSE
566              
567             This software is copyright (c) 2020, 2019, 2018, 2017, 2016 by perlancar@cpan.org.
568              
569             This is free software; you can redistribute it and/or modify it under
570             the same terms as the Perl 5 programming language system itself.
571              
572             =cut