File Coverage

blib/lib/Fsdb/Filter/dbroweval.pm
Criterion Covered Total %
statement 18 113 15.9
branch 0 42 0.0
condition n/a
subroutine 6 18 33.3
pod 7 7 100.0
total 31 180 17.2


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             #
4             # dbroweval.pm
5             # Copyright (C) 1991-2017 by John Heidemann
6             # $Id: 8c28f7b692255da8ba7895ca3d5c79edcf00f160 $
7             #
8             # This program is distributed under terms of the GNU general
9             # public license, version 2. See the file COPYING
10             # in $dblibdir for details.
11             #
12              
13             package Fsdb::Filter::dbroweval;
14              
15             =head1 NAME
16              
17             dbroweval - evaluate code for each row of a fsdb file
18              
19             =head1 SYNOPSIS
20              
21             dbroweval [-f CodeFile] code [code...]
22              
23             =head1 DESCRIPTION
24              
25             Evaluate code for each row of the data.
26              
27             Typical actions are things like reformatting
28             and other data transformations.
29              
30             Code can include embedded column names preceded by underscores;
31             these result in the value of that column for the current row.
32              
33             The values of the last row's columns are retrieved with _last_foo
34             where foo is the column name.
35              
36             Even more perverse, _columname(N) is the value of the
37             Nth column after columnname [so _columnname(0) is the also
38             the column's value.
39              
40              
41             =head1 OPTIONS
42              
43             =over 4
44              
45             =item B<-b CODE>
46              
47             Run CODE before reading any data (like awk BEGIN blocks).
48              
49             =item B<-e CODE>
50              
51             Run CODE at the end of all data (like awk END blocks).
52              
53             =item B<-f FILE>
54              
55             Read code from the FILE.
56              
57             =item B<-n> or B<--no-output>
58              
59             no output except for comments and what is in the provided code
60              
61             =item B<-N> or B<--no-output-even-comments>
62              
63             no output at all, except for what is in the provided code
64              
65             =item B<-m> or B<--manual-output>
66              
67             The user must setup output,
68             allowing arbitrary comments.
69             See example 2 below for details.
70              
71             =item B<-w> or B<--warnings>
72              
73             Enable warnings in user supplied code.
74              
75             =item B<--saveoutput $OUT_REF>
76              
77             Save output writer (for integration with other fsdb filters).
78              
79             =back
80              
81             =for comment
82             begin_standard_fsdb_options
83              
84             This module also supports the standard fsdb options:
85              
86             =over 4
87              
88             =item B<-d>
89              
90             Enable debugging output.
91              
92             =item B<-i> or B<--input> InputSource
93              
94             Read from InputSource, typically a file name, or C<-> for standard input,
95             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
96              
97             =item B<-o> or B<--output> OutputDestination
98              
99             Write to OutputDestination, typically a file name, or C<-> for standard output,
100             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
101              
102             =item B<--autorun> or B<--noautorun>
103              
104             By default, programs process automatically,
105             but Fsdb::Filter objects in Perl do not run until you invoke
106             the run() method.
107             The C<--(no)autorun> option controls that behavior within Perl.
108              
109             =item B<--header> H
110              
111             Use H as the full Fsdb header, rather than reading a header from
112             then input.
113              
114             =item B<--help>
115              
116             Show help.
117              
118             =item B<--man>
119              
120             Show full manual.
121              
122             =back
123              
124             =for comment
125             end_standard_fsdb_options
126              
127              
128             =head1 ADVANCED USAGE
129              
130             Typically L outputs a line in the same schema for each input line.
131             For advanced usage, one can violate each of these assumptions.
132              
133             Some fun:
134              
135             =over 4
136              
137             =item B
138              
139             Add the code C
140              
141             =item B
142              
143             Call C<&$write_fastpath_sub($fref)>.
144             You may find C<$fref>, the input row, useful.
145              
146             =item B
147              
148             See the examples below in L
149              
150             =back
151              
152              
153              
154             =head1 SAMPLE USAGE
155              
156             =head2 Input:
157              
158             #fsdb size mean stddev pct_rsd
159             1024 1.4962e+06 2.8497e+05 19.047
160             10240 5.0286e+06 6.0103e+05 11.952
161             102400 4.9216e+06 3.0939e+05 6.2863
162             # | dbsetheader size bw
163             # | /home/johnh/BIN/DB/dbmultistats size bw
164             # | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd
165              
166             =head2 Command:
167              
168             cat data.fsdb | dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);'
169              
170             =head2 Output:
171              
172             #fsdb size mean stddev pct_rsd
173             1024 1496200 284970 19.047
174             10240 5028600 601030 11.952
175             102400 4921600 309390 6.2863
176             # | dbsetheader size bw
177             # | /home/johnh/BIN/DB/dbmultistats size bw
178             # | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd
179             # | /home/johnh/BIN/DB/dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); }
180              
181              
182             =head2 Command 2: Changing the Schema
183              
184             By default, dbroweval reads and writes the same format file.
185             The recommended method of adding and removing columns is to do so
186             before or after dbroweval. I.e.,
187              
188             cat data.fsdb |
189             dbcolcreate divisible_by_ten |
190             dbroweval '_divisible_by_ten = (_size % 10 == 0);' |
191             dbrow '_divisible_by_ten == 1' |
192             dbcol size mean divisible_by_ten
193              
194             Another approach is to use the C command to skip output of a row.
195             I.e., the equivalent:
196              
197             cat data.fsdb |
198             dbcolcreate divisible_by_ten |
199             dbroweval '_divisible_by_ten = (_size % 10 == 0); next row if (!_divisible_by_ten);' |
200             dbcol size mean divisible_by_ten
201              
202             However, neither of these approachs work very well when the output
203             is a I different schema.
204              
205             The recommended method for schema-changing commands is to write a full
206             filter, but a full filter is a bit heavy weight.
207             As an alternative, one can use the C<-m> option to request
208             manual configuration of the output, then use C<@out_args> to define
209             the output schema (it specifies the C arguments),
210             and C<$ofref> is the output row.
211             It may also reference <$in>, the input C argument,
212             and <$fref> as an aref to the current line.
213             Note that newly created columns I have underscore-names
214              
215             Thus a third equivalent is:
216              
217             cat data.fsdb | \
218             dbroweval -m -b '@out_args = ( -clone => $in, \
219             -cols => ($in->cols, divisible_by_ten); ' \
220             'my $div_by_10 = (_size % 10 == 0); \
221             $ofref = [ @$fref, $div_by_10 ] if ($div_by_ten);' |
222             dbcol size mean divisible_by_ten
223              
224             or
225              
226             cat data.fsdb | \
227             dbroweval -m -b '@out_args = ( -clone => $in, \
228             -cols => [qw(size mean divisible_by_ten)] ); ' \
229             'my $div_by_10 = (_size % 10 == 0); \
230             $ofref = [ _mean, _size, $div_by_10 ] if ($div_by_ten);'
231              
232              
233             Finally, one can write different a completely different schema, although
234             it's more work:
235              
236             cat data.fsdb | \
237             dbroweval -m -b '@out_args = (-cols => [qw(size n)]);' \
238             '$ofref = [ _size, 1 ];'
239              
240             writes different columns, and
241              
242             cat data.fsdb | \
243             dbroweval -n -m -b '@out_args = (-cols => [qw(n)]); \
244             my $count = 0;' -e '$ofref = [ $count ];' '$count++;'
245              
246             Is a fancy way to count lines.
247              
248             The begin code block should setup C<@out_args> to be the arguments to a
249             C call, and whatever is in C<$ofref>
250             (if anything) is written for each input line,
251             and once at the end.
252              
253             =head2 Command 3: Fun With Suppressing Output
254              
255             The C<-n> option suppresses default output.
256             Thus, a simple equivalent to F is:
257              
258             dbroweval -n -e '$ofref = $lfref;'
259              
260             Where C<$ofref> is the output fields,
261             which are copied from C<$lfref>, the hereby documented
262             internal representation of the last row.
263             Yes, this is a bit unappetizing, but,
264             in for a penny with C<$ofref>, in for a pound.
265              
266             =head2 Command 4: Extra Ouptut
267              
268             Calling C<&$write_fastpath_sub($fref)> will do extra output,
269             so this simple program will duplicate each line of input
270             (one extra output, plus one regular output for each line of input):
271              
272             dbroweval '&$write_fastpath_sub($fref)'
273              
274              
275             =head1 BUGS
276              
277             Handling of code in files isn't very elegant.
278              
279              
280             =head1 SEE ALSO
281              
282             L
283              
284              
285             =head1 CLASS FUNCTIONS
286              
287             =cut
288              
289             @ISA = qw(Fsdb::Filter);
290             ($VERSION) = 2.0;
291              
292 1     1   6524 use strict;
  1         2  
  1         27  
293 1     1   6 use Pod::Usage;
  1         1  
  1         74  
294              
295 1     1   6 use Fsdb::Support;
  1         1  
  1         27  
296 1     1   10 use Fsdb::Filter;
  1         1  
  1         17  
297 1     1   7 use Fsdb::IO::Reader;
  1         1  
  1         14  
298 1     1   4 use Fsdb::IO::Writer;
  1         1  
  1         1001  
299              
300             =head2 new
301              
302             $filter = new Fsdb::Filter::dbroweval(@arguments);
303              
304             =cut
305              
306             sub new {
307 0     0 1   my $class = shift @_;
308 0           my $self = $class->SUPER::new(@_);
309 0           bless $self, $class;
310 0           $self->set_defaults;
311 0           $self->parse_options(@_);
312 0           $self->SUPER::post_new();
313 0           return $self;
314             }
315              
316             =head2 set_defaults
317              
318             $filter->set_defaults();
319              
320             Internal: set up defaults.
321              
322             =cut
323              
324             sub set_defaults ($) {
325 0     0 1   my($self) = @_;
326 0           $self->SUPER::set_defaults();
327 0           $self->{_beg_code} = [];
328 0           $self->{_end_code} = [];
329 0           $self->{_code_files} = [];
330 0           $self->{_code_lines} = [];
331 0           $self->{_warnings} = undef;
332 0           $self->{_header} = undef;
333              
334 0           $self->{_no_output} = undef;
335 0           $self->{_no_output_even_comments} = undef;
336             }
337              
338             =head2 _confirm_ending_semicolon
339              
340             Not a method; but an internal routine to make sure code compiles.
341              
342             =cut
343              
344             sub _confirm_ending_semicolon(@) {
345 0     0     my($c) = @_;
346 0 0         $c = $c . ";" if ($c !~ /\;\s*$/);
347 0           return $c;
348             }
349              
350             =head2 parse_options
351              
352             $filter->parse_options(@ARGV);
353              
354             Internal: parse options
355              
356             =cut
357              
358             sub parse_options ($@) {
359 0     0 1   my $self = shift @_;
360              
361             $self->get_options(\@_,
362 0     0     'help|?' => sub { pod2usage(1); },
363 0     0     'man' => sub { pod2usage(-verbose => 2); },
364             'autorun!' => \$self->{_autorun},
365             'b|begin=s@' => $self->{_beg_code},
366             'close!' => \$self->{_close},
367             'd|debug+' => \$self->{_debug},
368             'e|end=s@' => $self->{_end_code},
369             'f|code-files=s@' => $self->{_code_files},
370             'header=s' => \$self->{_header},
371 0     0     'i|input=s' => sub { $self->parse_io_option('input', @_); },
372             'log!' => \$self->{_logprog},
373             'm|manual-output' => \$self->{_manual_output},
374             'n|no-output' => \$self->{_no_output},
375             'N|no-output-even-comments' => \$self->{_no_output_even_comments},
376 0     0     'o|output=s' => sub { $self->parse_io_option('output', @_); },
377             'saveoutput=s' => \$self->{_save_output},
378             'w|warnings!' => \$self->{_warnings},
379 0 0         ) or pod2usage(2);
380             # rest is code
381 0           foreach (@_) {
382 0           push(@{$self->{_code_lines}}, _confirm_ending_semicolon($_));
  0            
383             };
384             }
385              
386             =head2 setup
387              
388             $filter->setup();
389              
390             Internal: setup, parse headers.
391              
392             =cut
393              
394             sub setup ($) {
395 0     0 1   my($self) = @_;
396              
397             #
398             # handle files
399             #
400 0           foreach (@{$self->{_code_files}}) {
  0            
401 0 0         open(INF, "<$_") || die $self->{_prog} . ": cannot open ``$_''.\n";
402 0           push(@{$self->{_code_lines}}, _confirm_ending_semicolon(join('', )));
  0            
403 0           close INF;
404             };
405              
406             #
407             # set up reader
408             #
409 0           $self->{_out} = undef;
410 0           my @in_options = ();
411 0 0         if ($self->{_no_output_even_comments}) {
412 0           $self->{_no_output} = 1;
413 0           push(@in_options, -outputheader => 'never');
414             };
415             push(@in_options, (-comment_handler => $self->create_pass_comments_sub))
416 0 0         if (!$self->{_no_output_even_comments});
417 0 0         push(@in_options, -header => $self->{_header}) if (defined($self->{_header}));
418 0           $self->finish_io_option('input', @in_options);
419 0           my $read_fastpath_sub = $self->{_in}->fastpath_sub();
420              
421             #
422             # convert code to perl
423             #
424 0           my($PERL_CODE_F, $DB_CODE_A_F, $TITLE_F, $COMMAND_F) = (0..20); # names for the for parts of our foreach bit next:
425 0           $self->{_pretty_args} = "";
426 0           my($beg_code, $code, $end_code);
427 0           my $c;
428 0           my $any_needs_lfref = undef;
429 0           my $this_needs_lfref;
430 0           foreach my $iref ([\$beg_code, $self->{_beg_code}, "BEGIN CODE:", "-b"],
431             [\$code, $self->{_code_lines}, "CODE:", ""],
432             [\$end_code, $self->{_end_code}, "END_CODE:", "-e"]) {
433 0 0         next if ($#{$iref->[$DB_CODE_A_F]} < 0);
  0            
434 0           ($c, $this_needs_lfref) = $self->{_in}->codify(@{$iref->[$DB_CODE_A_F]});
  0            
435 0           ${$iref->[$PERL_CODE_F]} = $c;
  0            
436 0 0         $any_needs_lfref = 1 if ($this_needs_lfref);
437 0           my($code) = join("\n", @{$iref->[$DB_CODE_A_F]});
  0            
438 0 0         $any_needs_lfref = 1 if ($code =~ /lfref/);
439             $self->{_pretty_args} .= " $iref->[$COMMAND_F] " .
440 0           Fsdb::Support::shell_quote("{ " . Fsdb::Support::code_prettify(@{$iref->[$DB_CODE_A_F]}) . " }");
  0            
441 0 0         print STDERR "$iref->[$TITLE_F]:\n$c\n" if ($self->{_debug});
442             };
443 0 0         exit 1 if ($self->{_debug} == 1);
444              
445             #
446             # write the loop
447             # xxx: should be able to optimize away $lfref
448             #
449             {
450 0           my $loop_sub;
  0            
451 0           my $in_ncols = $#{$self->{_in}->cols} + 1;
  0            
452 0           my @out_args = ();
453 0           my $row_output_code = '';
454 0           my $output_end_pre = q'
455             $ofref = undef; # reset for any finishing-up output
456             ';
457 0           my $output_end_post = q'
458             &$write_fastpath_sub($ofref) if (defined($ofref));
459             ';
460 0 0         if ($self->{_manual_output}) {
461 0           $row_output_code = 'if (defined($ofref)) { &$write_fastpath_sub($ofref); $ofref = undef; };';
462             } else {
463 0           @out_args = (-clone => $self->{_in});
464 0 0         $row_output_code = ($self->{_no_output}) ? '' :
465             '&$write_fastpath_sub($fref);';
466             };
467             my $loop_code = q'
468             my $selfref = \$self;
469             $loop_sub = sub {
470             ' .
471 0 0         ($self->{_warnings} ? "" : "no strict 'vars';\n") . q'
    0          
    0          
    0          
    0          
472             my $fref;
473             my $lfref;
474             my $ofref = undef;
475             my $write_fastpath_sub;
476             my $in = $self->{_in};
477             # begin user BEGIN CODE
478             ' . (defined($beg_code) ? $beg_code : '') . q'
479             # end user BEGIN CODE
480             ${$selfref}->finish_io_option("output", @out_args);
481             $write_fastpath_sub = ${$selfref}->{_out}->fastpath_sub();
482             row: # let users say "next row;"
483             while ($fref = &$read_fastpath_sub()) {
484             # begin user MAINLINE CODE
485             ' . (defined($code) ? $code : '') . q'
486             # end user MAINLINE CODE
487             ' . ($any_needs_lfref ? q'
488             ' : '') . $row_output_code . q'
489             } continue {
490             $lfref = $fref; # save for next pass
491             };
492             ' . $output_end_pre . q'
493             # begin user END CODE
494             ' . (defined($end_code) ? $end_code : '') . q'
495             # end user END CODE
496             ' . $output_end_post . q'
497             };
498             ';
499             print "\nLOOP CODE:\n$loop_code\n"
500 0 0         if ($self->{_debug} >= 2);
501 0           eval $loop_code;
502 0 0         $@ && die $self->{_prog} . ": eval error compiling user-provided code: $@.\n: CODE:\n$loop_code\n";
503 0           $self->{_loop_sub} = $loop_sub;
504             };
505 0           $self->{_beg_code_final} = $beg_code;
506 0           $self->{_end_code_final} = $end_code;
507 0           $self->{_code_final} = $code;
508             }
509              
510             =head2 run
511              
512             $filter->run();
513              
514             Internal: run over all IO
515              
516             =cut
517             sub run ($) {
518 0     0 1   my($self) = @_;
519 0           &{$self->{_loop_sub}}();
  0            
520             }
521              
522              
523             =head2 finish
524              
525             $filter->finish();
526              
527             Internal: write trailer.
528              
529             =cut
530             sub finish($) {
531 0     0 1   my($self) = @_;
532 0 0         return if ($self->{_no_output_even_comments});
533 0           $self->SUPER::finish();
534             }
535              
536              
537             =head2 compute_program_log
538              
539             $log = $filter->figure_program_log();
540              
541             Override compute_program_log to do pretty-printed arguments.
542              
543             =cut
544              
545             sub compute_program_log($) {
546 0     0 1   my $self = shift @_;
547              
548 0           my $log = " | " . $self->{_prog} . $self->{_pretty_args};
549 0           return $log;
550             }
551              
552              
553              
554             =head1 AUTHOR and COPYRIGHT
555              
556             Copyright (C) 1991-2017 by John Heidemann
557              
558             This program is distributed under terms of the GNU general
559             public license, version 2. See the file COPYING
560             with the distribution for details.
561              
562             =cut
563              
564             1;