File Coverage

blib/lib/Data/Clean.pm
Criterion Covered Total %
statement 153 176 86.9
branch 33 54 61.1
condition 12 23 52.1
subroutine 21 26 80.7
pod 3 15 20.0
total 222 294 75.5


line stmt bran cond sub pod time code
1              
2             use 5.010001;
3 1     1   1036 use strict;
  1         12  
4 1     1   5 use warnings;
  1         1  
  1         30  
5 1     1   5 use Log::ger;
  1         1  
  1         21  
6 1     1   1637  
  1         51  
  1         5  
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2022-08-28'; # DATE
9             our $DIST = 'Data-Clean'; # DIST
10             our $VERSION = '0.508'; # VERSION
11              
12             my ($class, %opts) = @_;
13             my $self = bless {_opts=>\%opts}, $class;
14 11     11 1 551764 log_trace("Cleanser options: %s", \%opts);
15 11         37  
16 11         32 my $cd = $self->_generate_cleanser_code;
17             for my $mod (keys %{ $cd->{modules} }) {
18 11         34 (my $mod_pm = "$mod.pm") =~ s!::!/!g;
19 9         16 require $mod_pm;
  9         24  
20 18         742 }
21 18         1208 $self->{_cd} = $cd;
22             $self->{_code} = eval $cd->{src}; ## no critic: BuiltinFunctions::ProhibitStringyEval
23 9         21 {
24 9         3553 last unless $cd->{clone_func} =~ /(.+)::(.+)/;
25             (my $mod_pm = "$1.pm") =~ s!::!/!g;
26 9 50       20 require $mod_pm;
  9         41  
27 9         38 }
28 9         38 die "Can't generate code: $@" if $@;
29              
30 9 50       19 $self;
31             }
32 9         33  
33             my ($self, $cd, $args) = @_;
34             my $mn = $args->[0];
35             die "Invalid method name syntax" unless $mn =~ /\A\w+\z/;
36 1     1 0 5 return "{{var}} = {{var}}->$mn; \$ref = ref({{var}})";
37 1         2 }
38 1 50       23  
39 0         0 my ($self, $cd, $args) = @_;
40             my $fn = $args->[0];
41             die "Invalid func name syntax" unless $fn =~ /\A\w+(::\w+)*\z/;
42             return "{{var}} = $fn({{var}}); \$ref = ref({{var}})";
43 2     2 0 4 }
44 2         3  
45 2 100       27 my ($self, $cd, $args) = @_;
46 1         5 return "{{var}} = {{var}} ? 1:0; \$ref = ''";
47             }
48              
49             my ($self, $cd, $args) = @_;
50 0     0 0 0 return "{{var}} = \${ {{var}} } ? 1:0; \$ref = ''";
51 0         0 }
52              
53             my ($self, $cd, $args) = @_;
54             return '{{var}} = ${ {{var}} }; $ref = ref({{var}})';
55 0     0 0 0 }
56 0         0  
57             my ($self, $cd, $args) = @_;
58             return '{{var}} = "{{var}}"; $ref = ""';
59             }
60 0     0 0 0  
61 0         0 my ($self, $cd, $args) = @_;
62             return '{{var}} = $ref; $ref = ""';
63             }
64              
65 0     0 0 0 require String::PerlQuote;
66 0         0  
67             my ($self, $cd, $args) = @_;
68             return "{{var}} = ".String::PerlQuote::double_quote($args->[0]).'; $ref=""';
69             }
70 2     2 0 5  
71 2         3 my ($self, $cd, $args) = @_;
72              
73             return join(
74             "",
75 3     3 0 474 'my $reftype = Scalar::Util::reftype({{var}}); ',
76             '{{var}} = $reftype eq "HASH" ? {%{ {{var}} }} :',
77 3         580 ' $reftype eq "ARRAY" ? [@{ {{var}} }] :',
78 3         8 ' $reftype eq "SCALAR" ? \(my $copy = ${ {{var}} }) :',
79             ' $reftype eq "CODE" ? sub { goto &{ {{var}} } } :',
80             '(die "Cannot unbless object with type $ref")',
81             );
82 1     1 0 3 }
83              
84 1         3 my ($self, $cd, $args) = @_;
85              
86             my $limit = $args->[0] // 1;
87             return join(
88             "",
89             "if (++\$ctr_circ <= $limit) { ",
90             "{{var}} = $cd->{clone_func}({{var}}); redo ",
91             "} else { ",
92             "{{var}} = 'CIRCULAR'; \$ref = '' }",
93             );
94             }
95              
96 2     2 0 5 my ($self, $cd, $args) = @_;
97              
98 2   50     5 # code taken from Function::Fallback::CoreOrPP 0.07
99 2         10 $cd->{subs}{unbless} //= <<'EOC';
100             my $ref = shift;
101              
102             my $r = ref($ref);
103             # not a reference
104             return $ref unless $r;
105              
106             # return if not a blessed ref
107             my ($r2, $r3) = "$ref" =~ /(.+)=(.+?)\(/
108             or return $ref;
109 1     1 0 3  
110             if ($r3 eq 'HASH') {
111             return { %$ref };
112 1   50     6 } elsif ($r3 eq 'ARRAY') {
113             return [ @$ref ];
114             } elsif ($r3 eq 'SCALAR') {
115             return \( my $copy = ${$ref} );
116             } else {
117             die "Can't handle $ref";
118             }
119             EOC
120              
121             "{{var}} = \$sub_unbless->({{var}}); \$ref = ref({{var}})";
122             }
123              
124             # test
125             my ($self, $cd, $args) = @_;
126             return "die";
127             }
128              
129             my $self = shift;
130             my $opts = $self->{_opts};
131              
132             # compilation data, a structure that will be passed around between routines
133             # during the generation of cleanser code.
134 1         2 my $cd = {
135             modules => {}, # key = module name, val = version
136             clone_func => $self->{_opts}{'!clone_func'},
137             code => '',
138             subs => {},
139 0     0 0 0 };
140 0         0  
141             $cd->{modules}{'Scalar::Util'} //= 0;
142             $cd->{modules}{'Data::Dmp'} //= 0 if $opts->{'!debug'};
143              
144 11     11   14 if (!$cd->{clone_func}) {
145 11         17 $cd->{clone_func} = $ENV{PERL_DATA_CLEAN_CLONE_FUNC} //
146             'Clone::PP::clone';
147             }
148             {
149             last unless $cd->{clone_func} =~ /(.+)::(.+)/;
150             $cd->{modules}{$1} //= 0;
151 11         34 }
152              
153             my (@code, @stmts_ary, @stmts_hash, @stmts_main);
154              
155             my $n = 0;
156 11   50     47 my $add_stmt = sub {
157 11 50 0     23 my $which = shift;
158             if ($which eq 'if' || $which eq 'new_if') {
159 11 50       20 my ($cond0, $act0) = @_;
160             for ([\@stmts_ary, '$e', 'ary'],
161 11   50     28 [\@stmts_hash, '$h->{$k}', 'hash'],
162             [\@stmts_main, '$_', 'main']) {
163             my $act = $act0 ; $act =~ s/\Q{{var}}\E/$_->[1]/g;
164 11 50       13 my $cond = $cond0; $cond =~ s/\Q{{var}}\E/$_->[1]/g;
  11         81  
165 11   50     42 if ($opts->{'!debug'}) { unless (@{ $_->[0] }) { push @{ $_->[0] }, ' print "DEBUG:'.$_->[2].' cleaner: val=", Data::Dmp::dmp_ellipsis('.$_->[1].'), ", ref=$ref\n"; '."\n" } }
166             push @{ $_->[0] }, " ".($n && $which ne 'new_if' ? "els":"")."if ($cond) { $act }\n";
167             }
168 11         15 $n++;
169             } else {
170 11         13 my ($stmt0) = @_;
171             for ([\@stmts_ary, '$e', 'ary'],
172 30     30   36 [\@stmts_hash, '$h->{$k}', 'hash'],
173 30 100 100     71 [\@stmts_main, '$_', 'main']) {
174 28         43 my $stmt = $stmt0; $stmt =~ s/\Q{{var}}\E/$_->[1]/g;
175 28         67 push @{ $_->[0] }, " $stmt;\n";
176             }
177             }
178 84         110 };
  84         259  
179 84         111 my $add_if = sub {
  84         118  
180 84 0       132 $add_stmt->('if', @_);
  0 50       0  
  0         0  
  0         0  
  0         0  
181 84 100 100     118 };
  84         324  
182             my $add_new_if = sub {
183 28         98 $add_stmt->('new_if', @_);
184             };
185 2         3 my $add_if_ref = sub {
186 2         6 my ($ref, $act0) = @_;
187             $add_if->("\$ref eq '$ref'", $act0);
188             };
189 6         9 my $add_new_if_ref = sub {
  6         15  
190 6         7 my ($ref, $act0) = @_;
  6         13  
191             $add_new_if->("\$ref eq '$ref'", $act0);
192             };
193 11         46  
194             # catch circular references
195 16     16   37 my $circ = $opts->{-circular};
196 11         28 if ($circ) {
197             my $meth = "command_$circ->[0]";
198 12     12   20 die "Can't handle command $circ->[0] for option '-circular'" unless $self->can($meth);
199 11         19 my @args = @$circ; shift @args;
200             my $act = $self->$meth($cd, \@args);
201 10     10   16 if ($opts->{'!debug'}) { $add_stmt->('stmt', 'print "DEBUG: main cleaner: ref=$ref, " . {{var}} . "\n"'); }
202 10         22 $add_new_if->('$ref && $refs{ {{var}} }++', $act);
203 11         21 }
204              
205 7     7   9 # catch object of specified classes (e.g. DateTime, etc)
206 7         18 for my $on (grep {/\A\w*(::\w+)*\z/} sort keys %$opts) {
207 11         23 my $o = $opts->{$on};
208             next unless $o;
209             my $meth = "command_$o->[0]";
210 11         15 die "Can't handle command $o->[0] for option '$on'" unless $self->can($meth);
211 11 100       20 my @args = @$o; shift @args;
212 3         6 my $act = $self->$meth($cd, \@args);
213 3 50       24 $add_if_ref->($on, $act);
214 3         9 }
  3         4  
215 3         8  
216 3 50       14 # catch general object not caught by previous
  0         0  
217 3         5 for my $p ([-obj => 'Scalar::Util::blessed({{var}})']) {
218             my $o = $opts->{$p->[0]};
219             next unless $o;
220             my $meth = "command_$o->[0]";
221 11         46 die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
  14         81  
222 3         7 my @args = @$o; shift @args;
223 3 50       8 $add_if->($p->[1], $self->$meth($cd, \@args));
224 3         7 }
225 3 50       16  
226 3         7 # recurse array and hash
  3         5  
227 3         7 if ($opts->{'!recurse_obj'}) {
228 3         12 $add_stmt->('stmt', 'my $reftype=Scalar::Util::reftype({{var}})//""');
229             $add_new_if->('$reftype eq "ARRAY"', '$process_array->({{var}})');
230             $add_if->('$reftype eq "HASH"' , '$process_hash->({{var}})');
231             } else {
232 11         24 $add_new_if_ref->("ARRAY", '$process_array->({{var}})');
233 11         18 $add_if_ref->("HASH" , '$process_hash->({{var}})');
234 11 100       22 }
235 6         11  
236 6 50       21 # lastly, catch any reference left
237 6         14 for my $p ([-ref => '$ref']) {
  6         8  
238 6         18 my $o = $opts->{$p->[0]};
239             next unless $o;
240             my $meth = "command_$o->[0]";
241             die "Can't handle command $o->[0] for option '$p->[0]'" unless $self->can($meth);
242 9 100       28 my @args = @$o; shift @args;
243 2         6 $add_if->($p->[1], $self->$meth($cd, \@args));
244 2         6 }
245 2         5  
246             push @code, 'sub {'."\n";
247 7         15  
248 7         13 for (sort keys %{$cd->{subs}}) {
249             push @code, "state \$sub_$_ = sub { ".$cd->{subs}{$_}." };\n";
250             }
251              
252 9         15 push @code, 'my $data = shift;'."\n";
253 9         15 push @code, 'state %refs;'."\n" if $circ;
254 9 50       18 push @code, 'state $ctr_circ;'."\n" if $circ;
255 0         0 push @code, 'state $process_array;'."\n";
256 0 0       0 push @code, 'state $process_hash;'."\n";
257 0         0 push @code, (
  0         0  
258 0         0 'if (!$process_array) { $process_array = sub { my $a = shift; for my $e (@$a) { ',
259             'my $ref=ref($e);'."\n",
260             join("", @stmts_ary).'} } }'."\n"
261 9         13 );
262             push @code, (
263 9         9 'if (!$process_hash) { $process_hash = sub { my $h = shift; for my $k (keys %$h) { ',
  9         24  
264 1         18 'my $ref=ref($h->{$k});'."\n",
265             join("", @stmts_hash).'} } }'."\n"
266             );
267 9         16 push @code, '%refs = (); $ctr_circ=0;'."\n" if $circ;
268 9 100       17 push @code, (
269 9 100       14 'for ($data) { ',
270 9         15 'my $ref=ref($_);'."\n",
271 9         13 join("", @stmts_main).'}'."\n"
272 9         37 );
273             push @code, 'print "DEBUG: main cleaner: result: ", Data::Dmp::dmp_ellipsis($data), "\n";'."\n" if $opts->{'!debug'};
274             push @code, '$data'."\n";
275             push @code, '}'."\n";
276              
277 9         62 my $code = join("", @code).";";
278              
279             if ($ENV{LOG_CLEANSER_CODE} && log_is_trace()) {
280             require String::LineNumber;
281             log_trace("Cleanser code:\n%s",
282 9 100       15 $ENV{LINENUM} // 1 ?
283 9         22 String::LineNumber::linenum($code) : $code);
284             }
285              
286             $cd->{src} = $code;
287              
288 9 50       17 $cd;
289 9         13 }
290 9         12  
291             my ($self, $data) = @_;
292 9         63  
293             $self->{_code}->($data);
294 9 50 33     22 }
295 0         0  
296             no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict
297 0 0 0     0  
298             my ($self, $data) = @_;
299             my $clone = &{$self->{_cd}{clone_func}}($data);
300             $self->clean_in_place($clone);
301 9         15 }
302              
303 9         152 1;
304             # ABSTRACT: Clean data structure
305              
306              
307 11     11 1 1132 =pod
308              
309 11         198 =encoding UTF-8
310              
311             =head1 NAME
312              
313 1     1   2320 Data::Clean - Clean data structure
  1         2  
  1         83  
314              
315 4     4 1 20 =head1 VERSION
316 4         7  
  4         16  
317 4         367 This document describes version 0.508 of Data::Clean (from Perl distribution Data-Clean), released on 2022-08-28.
318              
319             =head1 SYNOPSIS
320              
321             use Data::Clean;
322              
323             my $cleanser = Data::Clean->new(
324             # specify how to deal with specific classes
325             'DateTime' => [call_method => 'epoch'], # replace object with its epoch
326             'Time::Moment' => [call_method => 'epoch'], # replace object with its epoch
327             'Regexp' => ['stringify'], # replace $obj with "$obj"
328              
329             # specify how to deal with all scalar refs
330             SCALAR => ['deref_scalar'], # replace \1 with 1
331              
332             # specify how to deal with circular reference
333             -circular => ['clone'],
334              
335             # specify how to deal with all other kinds of objects
336             -obj => ['unbless'],
337              
338             # recurse into object
339             #'!recurse_obj'=> 1,
340              
341             # generate cleaner with debugging messages
342             #'!debug' => 1,
343             );
344              
345             # to get cleansed data
346             my $cleansed_data = $cleanser->clone_and_clean($data);
347              
348             # to replace original data with cleansed one
349             $cleanser->clean_in_place($data);
350              
351             =head1 DESCRIPTION
352              
353             This class can be used to process a data structure by replacing some forms of
354             data items with other forms. One of the main uses is to clean "unsafe" data,
355             e.g. clean a data structure so it can be encoded to JSON (see
356             L<Data::Clean::ForJSON>, which is a thin wrapper over this class).
357              
358             As can be seen from the example, you specify a list of transformations to be
359             done, and then this class will generate an appropriate Perl code to do the
360             cleansing. This class is faster than the other ways of processing, e.g.
361             L<Data::Rmap> (see L<Bencher::Scenarios::DataCleansing> for some benchmarks).
362              
363             =for Pod::Coverage ^(command_.+)$
364              
365             =head1 METHODS
366              
367             =head2 new(%opts) => $obj
368              
369             Create a new instance.
370              
371             Options specify what to do with certain category of data. Option keys are either
372             reference types (like C<HASH>, C<ARRAY>, C<SCALAR>) or class names (like
373             C<Foo::Bar>), or C<-obj> (to match all kinds of objects, a.k.a. blessed
374             references), C<-circular> (to match circular references), C<-ref> (to refer to
375             any kind of references, used to process references not handled by other
376             options). Option values are arrayrefs, the first element of the array is command
377             name, to specify what to do with the reference/class. The rest are command
378             arguments.
379              
380             Note that arrayrefs and hashrefs are always walked into, so it's not trapped by
381             C<-ref>.
382              
383             Default for C<%opts>: C<< -ref => 'stringify' >>.
384              
385             Option keys that start with C<!> are special:
386              
387             =over
388              
389             =item * !recurse_obj (bool)
390              
391             Can be set to true to to recurse into objects if they are hash- or array-based.
392             By default objects are not recursed into. Note that if you enable this option,
393             object options (like C<Foo::Bar> or C<-obj>) won't work for hash- and
394             array-based objects because they will be recursed instead.
395              
396             =item * !clone_func (str)
397              
398             Set fully qualified name of clone function to use. The default is to get the
399             value of the environment C<PERL_DATA_CLEAN_CLONE_FUNC> or use the default
400             C<Clone::PP::clone>.
401              
402             The clone module (all but the last part of the C<!clone_func> value) will
403             automatically be loaded using C<require()>.
404              
405             =item * !debug (bool)
406              
407             If set to true, will generate code to print debugging messages. For debugging
408             only.
409              
410             =back
411              
412             Available commands:
413              
414             =over 4
415              
416             =item * ['stringify']
417              
418             This will stringify a reference like C<{}> to something like C<HASH(0x135f998)>.
419              
420             =item * ['replace_with_ref']
421              
422             This will replace a reference like C<{}> with C<HASH>.
423              
424             =item * ['replace_with_str', STR]
425              
426             This will replace a reference like C<{}> with I<STR>.
427              
428             =item * ['call_method' => STR]
429              
430             This will call a method named I<STR> and use its return as the replacement. For
431             example: C<< DateTime->from_epoch(epoch=>1000) >> when processed with C<<
432             [call_method => 'epoch'] >> will become 1000.
433              
434             =item * ['call_func', STR]
435              
436             This will call a function named I<STR> with value as argument and use its return
437             as the replacement.
438              
439             =item * ['one_or_zero']
440              
441             This will perform C<< $val ? 1:0 >>.
442              
443             =item * ['deref_scalar_one_or_zero']
444              
445             This will perform C<< ${$val} ? 1:0 >>.
446              
447             =item * ['deref_scalar']
448              
449             This will replace a scalar reference like \1 with 1.
450              
451             =item * ['unbless']
452              
453             This will perform unblessing using L<Function::Fallback::CoreOrPP::unbless()>.
454             Should be done only for objects (C<-obj>).
455              
456             =item * ['die']
457              
458             Die. Only for testing.
459              
460             =item * ['code', STR]
461              
462             This will replace with I<STR> treated as Perl code.
463              
464             =item * ['clone', INT]
465              
466             This command is useful if you have circular references and want to expand/copy
467             them. For example:
468              
469             my $def_opts = { opt1 => 'default', opt2 => 0 };
470             my $users = { alice => $def_opts, bob => $def_opts, charlie => $def_opts };
471              
472             C<$users> contains three references to the same data structure. With the default
473             behaviour of C<< -circular => [replace_with_str => 'CIRCULAR'] >> the cleaned
474             data structure will be:
475              
476             { alice => { opt1 => 'default', opt2 => 0 },
477             bob => 'CIRCULAR',
478             charlie => 'CIRCULAR' }
479              
480             But with C<< -circular => ['clone'] >> option, the data structure will be
481             cleaned to become (the C<$def_opts> is cloned):
482              
483             { alice => { opt1 => 'default', opt2 => 0 },
484             bob => { opt1 => 'default', opt2 => 0 },
485             charlie => { opt1 => 'default', opt2 => 0 }, }
486              
487             The command argument specifies the number of references to clone as a limit (the
488             default is 50), since a cyclical structure can lead to infinite cloning. Above
489             this limit, the circular references will be replaced with a string
490             C<"CIRCULAR">. For example:
491              
492             my $a = [1]; push @$a, $a;
493              
494             With C<< -circular => ['clone', 2] >> the data will be cleaned as:
495              
496             [1, [1, [1, "CIRCULAR"]]]
497              
498             With C<< -circular => ['clone', 3] >> the data will be cleaned as:
499              
500             [1, [1, [1, [1, "CIRCULAR"]]]]
501              
502             =back
503              
504             =head2 $obj->clean_in_place($data) => $cleaned
505              
506             Clean $data. Modify data in-place.
507              
508             =head2 $obj->clone_and_clean($data) => $cleaned
509              
510             Clean $data. Clone $data first.
511              
512             =head1 ENVIRONMENT
513              
514             =over
515              
516             =item * PERL_DATA_CLEAN_CLONE_FUNC
517              
518             String. Set default for C<!clone_func> option.
519              
520             =item * LOG_CLEANSER_CODE => BOOL (default: 0)
521              
522             Can be enabled if you want to see the generated cleanser code. It is logged at
523             level C<trace> using L<Log::ger>.
524              
525             =item * LINENUM => BOOL (default: 1)
526              
527             When logging cleanser code, whether to give line numbers.
528              
529             =back
530              
531             =head1 HOMEPAGE
532              
533             Please visit the project's homepage at L<https://metacpan.org/release/Data-Clean>.
534              
535             =head1 SOURCE
536              
537             Source repository is at L<https://github.com/perlancar/perl-Data-Clean>.
538              
539             =head1 SEE ALSO
540              
541             Related modules: L<Data::Rmap>, L<Hash::Sanitize>, L<Data::Walk>.
542              
543             =head1 AUTHOR
544              
545             perlancar <perlancar@cpan.org>
546              
547             =head1 CONTRIBUTING
548              
549              
550             To contribute, you can send patches by email/via RT, or send pull requests on
551             GitHub.
552              
553             Most of the time, you don't need to build the distribution yourself. You can
554             simply modify the code, then test via:
555              
556             % prove -l
557              
558             If you want to build the distribution (e.g. to try to install it locally on your
559             system), you can install L<Dist::Zilla>,
560             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
561             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
562             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
563             that are considered a bug and can be reported to me.
564              
565             =head1 COPYRIGHT AND LICENSE
566              
567             This software is copyright (c) 2022, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
568              
569             This is free software; you can redistribute it and/or modify it under
570             the same terms as the Perl 5 programming language system itself.
571              
572             =head1 BUGS
573              
574             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Data-Clean>
575              
576             When submitting a bug or request, please include a test-file or a
577             patch to an existing test-file that illustrates the bug or desired
578             feature.
579              
580             =cut