File Coverage

blib/lib/Fsdb/Filter.pm
Criterion Covered Total %
statement 29 267 10.8
branch 0 148 0.0
condition 0 60 0.0
subroutine 10 38 26.3
pod 25 25 100.0
total 64 538 11.9


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # Filter.pm
5             # $Id: 7843a6bb9a62b736e670fcf61fdb3e66994cf79f $
6             #
7             # Copyright (C) 2007-2008 by John Heidemann
8             #
9             # This program is free software; you can redistribute it and/or
10             # modify it under the terms of the GNU General Public License
11             # version 2, as published by the Free Software Foundation.
12             #
13             # This program is distributed in the hope that it will be useful,
14             # but WITHOUT ANY WARRANTY; without even the implied warranty of
15             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16             # GNU General Public License for more details.
17             #
18             # You should have received a copy of the GNU General Public License along
19             # with this program; if not, write to the Free Software Foundation, Inc.,
20             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21             #
22              
23              
24             package Fsdb::Filter;
25              
26             =head1 NAME
27              
28             Fsdb::Filter - base class for Fsdb filters
29              
30             =head1 DESCRIPTION
31              
32             Fsdb::Filter is the virtual base class for Fsdb filters.
33              
34             Users will typically invoke individual programs via the command line
35             (for example, see L)
36             or string together several in a Perl program
37             as described in L.
38              
39             For new Filter developers, internal processing is:
40              
41             new
42             set_defaults
43             parse_options
44             autorun if desired
45             parse_options # optionally called additional times
46             setup # does IO on header
47             run # does IO on data
48             finish # any shutdown
49              
50             In addition, the C method returns metadata about a given filter.
51              
52             =head1 FUNCTIONS
53              
54             =cut
55              
56             @ISA = ();
57             ($VERSION) = 1.0;
58              
59 2     2   11167 use strict;
  2         4  
  2         64  
60 2     2   46 use 5.010;
  2         6  
61 2     2   7 use Carp qw(carp croak);
  2         3  
  2         143  
62 2     2   1223 use IO::Handle;
  2         11468  
  2         93  
63 2     2   1154 use IO::File;
  2         3617  
  2         262  
64 2     2   1553 use Getopt::Long qw(GetOptionsFromArray :config no_ignore_case bundling permute);
  2         18755  
  2         25  
65              
66 2     2   1620 use Fsdb::IO::Reader;
  2         8  
  2         76  
67 2     2   1162 use Fsdb::IO::Writer;
  2         7  
  2         88  
68 2     2   1436 use Fsdb::Support;
  2         6  
  2         117  
69 2     2   1272 use Fsdb::Support::DelayPassComments;
  2         6  
  2         6885  
70              
71             =head2 new
72              
73             $fsdb = new Fsdb::Filter;
74              
75             Create a new filter object, calling set_defaults
76             and parse_options. A user program will call a specific
77             filter (say Fsdb::Filter::dbcol) to do processing.
78             See also L for aliases that remove the wordiness.
79              
80             =cut
81              
82             sub new {
83 0     0 1   my ($class) = @_;
84 0           my $self = bless {}, $class;
85 0           $self->set_defaults;
86 0           return $self;
87             }
88              
89             =head2 post_new
90              
91             $filter->post_new();
92              
93             Called when the subclass is done with new,
94             giving Fsdb::Filter a chance to autorun.
95              
96             =cut
97              
98             sub post_new {
99 0     0 1   my $self = shift @_;
100             $self->setup_run_finish()
101 0 0         if ($self->{_autorun});
102             }
103              
104              
105             =head2 set_defaults
106              
107             $filter->set_defaults();
108              
109             Set up object defaults.
110             Called once during new.
111              
112             Fsdb::Filter::set_defaults does some general setup,
113             tracking module invocation and preparing for one input and output stream.
114              
115              
116             =cut
117              
118             sub set_defaults ($) {
119 0     0 1   my $self = shift @_;
120              
121             # see the info() method for documentation
122             $self->{_info} = {
123 0           input_type => 'fsdb*',
124             input_count => 1,
125             output_type => 'fsdb*',
126             output_count => 1,
127             };
128              
129 0           my($package, $filename, $line) = caller(1);
130 0           $filename =~ s@^.*/@@g; # strip junk
131 0           $filename =~ s@\.pm$@@; # strip junk
132 0           $self->{_prog} = $filename;
133 0           $self->{_orig_argv} = undef;
134 0           $self->{_logprog} = 1;
135 0           $self->{_close} = 1;
136 0           $self->{_save_output} = undef;
137 0           $self->{_empty} = '-';
138              
139 0           $self->{_debug} = 0;
140 0           $self->{_error} = undef;
141              
142             # Fsdb::IO objects
143 0           $self->{_in} = undef;
144 0           $self->{_out} = undef;
145             # filehandles or Fsdb::BoundedQueue:
146             #
147             # These next two lines cause obnoxious warnings in some versions
148             # $self->{_input} = \*STDIN;
149             # $self->{_output} = \*STDOUT;
150             # of perl. in 5.10.1:
151             #
152             # Unbalanced string table refcount: (2) for "blib/script/dbjoin" during global destruction.
153             # Unbalanced string table refcount: (1) for "STDOUT" during global destruction.
154             # Unbalanced string table refcount: (1) for "STDIN" during global destruction.
155             # Scalars leaked: 2
156             # Unbalanced string table refcount: (2) for "blib/script/dbjoin" during global destruction.
157             # Unbalanced string table refcount: (1) for "STDOUT" during global destruction.
158             # Unbalanced string table refcount: (1) for "STDIN" during global destruction.
159             # Scalars leaked: 2
160             #
161             # fix: switch to magic variables '-' and catch them later in finish_fh_io_option.
162             #
163 0           $self->{_input} = '-';
164 0           $self->{_output} = '-';
165              
166 0           $self->{_autorun} = undef;
167             };
168              
169             =head2 set_default_tmpdir
170              
171             $filter->set_default_tmpdir
172              
173             Figure out a tmpdir, from environment variables if necessary.
174              
175             =cut
176              
177             sub set_default_tmpdir($;$) {
178 0     0 1   my $self = shift @_;
179              
180 0           foreach ($_[0], $ENV{'TMPDIR'}, "/tmp") {
181 0 0         if (defined($_)) {
182 0           $self->{_tmpdir} = $_;
183 0           return;
184             };
185             };
186 0           die "internal error in set_default_tmpdir";
187             }
188              
189             =head2 parse_options
190              
191             $filter->parse_options(@ARGV);
192              
193             Parse_options is called one I times to parse ARGV-style options.
194             It should not do any IO or any irreverssable actions; defer those to startup.
195              
196             Fsdb::Filter::parse_options does no work; the subclass is expected
197             to call Fsdb::Filter::get_options() for all arguments.
198              
199             Most modules implement certain common fsdb options, listed below.
200              
201             =for comment
202             (Note that this text in Fsdb::Filter.pm is the master copy,
203             replicated in to all the module.pm files via C.)
204              
205             =for comment
206             begin_standard_fsdb_options
207              
208             This module also supports the standard fsdb options:
209              
210             =over 4
211              
212             =item B<-d>
213              
214             Enable debugging output.
215              
216             =item B<-i> or B<--input> InputSource
217              
218             Read from InputSource, typically a file name, or C<-> for standard input,
219             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
220              
221             =item B<-o> or B<--output> OutputDestination
222              
223             Write to OutputDestination, typically a file name, or C<-> for standard output,
224             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
225              
226             =item B<--autorun> or B<--noautorun>
227              
228             By default, programs process automatically,
229             but Fsdb::Filter objects in Perl do not run until you invoke
230             the run() method.
231             The C<--(no)autorun> option controls that behavior within Perl.
232              
233             =item B<--noclose>
234              
235             By default, programs close their output when done.
236             For some cases where objects are used internally, C<--noclose>
237             may be used to leave output open for further I/O.
238             (This option is only supported by some filters.)
239              
240             =item B<--saveoutput $OUT_REF>
241              
242             By default, programs close their output when done.
243             With this option, programs in Perl can have a subprogram create
244             an output refrence and return it to the caller in C<$OUT_REF>.
245             The caller can then use it for further I/O.
246             (This option is only supported by some filters.)
247              
248             =item B<--help>
249              
250             Show help.
251              
252             =item B<--man>
253              
254             Show full manual.
255              
256             =back
257              
258             =for comment
259             end_standard_fsdb_options
260              
261              
262             =cut
263              
264             sub parse_options ($@) {
265 0     0 1   my $self = shift @_;
266 0           die "Fsdb:Filter: we expect the subprogram to handle parse_options.\n";
267             }
268              
269             =head2 parse_target_column
270              
271             $self->parse_target_column(\@argv);
272              
273             A helper function: allow one column to be specified as the C<_target_column>.
274              
275             =cut
276              
277             sub parse_target_column($$) {
278 0     0 1   my($self, $argv_ref) = @_;
279              
280 0 0 0       if ($#{$argv_ref} > 0 || $#{$argv_ref} == 0 && defined($self->{_target_column})) {
  0 0 0        
  0            
281             # xxx: next line is "die" because with perl v5.8.8,
282             # I get this error message with croak:
283             # Bizarre copy of HASH in sassign at /usr/lib/perl5/5.8.8/Carp/Heavy.pm line 45.
284 0           die $self->{_prog} . ": cannot specify multiple columns\n";
285 0           } elsif ($#{$argv_ref} == 0) {
286 0           $self->{_target_column} = $argv_ref->[0];
287             };
288             }
289              
290             =head2 get_options
291              
292             $success = $filter->get_options(\@argv, "v+" => \$verbose, ...)
293              
294             get_options is just like Getopt::Long's GetOptions,
295             but takes the argument list as the first argument.
296             This list is modified and any non-options are returned.
297             It also saves _orig_argv in itself.
298              
299             =cut
300              
301             sub get_options($$@) {
302 0     0 1   my $self = shift @_;
303 0           my $argv_ref = shift @_;
304 0           my @opt_specs = @_;
305             # hacky interface to GetOptions, we have to copy to and from ARGV
306 0           foreach my $p (@$argv_ref) {
307 0           my $p_copy = $p;
308 0 0         if (ref($p_copy) =~ /\:\:/) {
309             # remove the internal memory cruft for internal perl objects
310 0           $p_copy = ref($p_copy);
311 0           $p_copy =~ s/\=.*$//;
312 0 0         $p_copy = "::FsdbIPC" if ($p_copy =~ /^(Fsdb::BoundedQueue|IO::Pipe)/);
313 0           $p_copy = "[$p_copy]";
314             };
315 0           push (@{$self->{_orig_argv}}, $p_copy);
  0            
316             };
317 0           my $result = GetOptionsFromArray($argv_ref, @opt_specs);
318 0           return $result;
319             }
320              
321             =head2 parse_sort_option
322              
323             $fsdb_io = $filter->parse_sort_option($option_name, $target);
324              
325             This helper function handles sorting options and column names
326             as described in L. We normalize long sort options
327             to unbundled short options
328             and accumulate them in $self->{_sort_argv}.
329              
330             =cut
331              
332             sub parse_sort_option ($$$) {
333 0     0 1   my $self = shift @_;
334 0           my ($option_name, $target) = @_;
335              
336 0           my $sort_aref = $self->{_sort_argv};
337 0 0 0       if ($option_name eq '<>') {
    0 0        
    0 0        
    0 0        
    0          
338 0           push (@$sort_aref, $target);
339             } elsif ($option_name eq 'n' || $option_name eq 'numeric') {
340 0           push (@$sort_aref, '-n');
341             } elsif ($option_name eq 'N' || $option_name eq 'lexical') {
342 0           push (@$sort_aref, '-N');
343             } elsif ($option_name eq 'r' || $option_name eq 'descending') {
344 0           push (@$sort_aref, '-r');
345             } elsif ($option_name eq 'R' || $option_name eq 'descending') {
346 0           push (@$sort_aref, '-R');
347             } else {
348 0           croak "parse_sort_option: unknown option $option_name for target $target\n";
349             };
350             }
351              
352              
353              
354             =head2 parse_io_option
355              
356             $fsdb_io = $filter->parse_io_option($io_direction, $option_name, $target);
357              
358             This helper function handles C<--input> or C<--output> options,
359             without doing any setup.
360              
361             It fills in $self->{_$IO_DIRECTION} with the resulting object,
362             which is either a file handle or Fsdb::Filter::Piepline object,
363             and expects C to convert
364             this token into a full Fsdb::IO object.
365              
366             $IO_DIRECTION is usually input or output, but it can also be inputs
367             (with an "s") when multiple input sources are allowed.
368              
369             =cut
370              
371             sub parse_io_option ($$$$) {
372 0     0 1   my $self = shift @_;
373 0           my $direction = shift @_;
374 0           my $option_name = shift @_;
375             # Next line wackiness: perl-5.10's Getopt::Long passes an object, not a
376             # string, so force it to be a string.
377 0 0         $option_name = "$option_name" if (ref($option_name));
378 0           my $target = shift @_;
379 0 0         $target = $option_name if (!defined($target)); # xxx: sometimes we seem to loose an argument (bug in Getopt::Long?)
380              
381 0 0         my($mode) = $direction eq 'output' ? "w" : "r";
382              
383 0           my $token = $target;
384              
385 0 0 0       if ($direction eq 'input' || $direction eq 'output') {
    0 0        
386 0           $self->{"_$direction"} = $token;
387             } elsif ($direction eq 'inputs' || $direction eq 'outputs') {
388 0           push (@{$self->{"_$direction"}}, $token);
  0            
389             } else {
390 0           croak "internal error: bad direction";
391             };
392             }
393              
394              
395             =head2 finish_one_io_option
396              
397             $fsdb_io = $filter->finish_io_option($io_direction, $token, @fsdb_args);
398              
399             This helper function
400             finishes setting up a Fsdb::IO object in $IO_DIRECTION,
401             using $TOKEN as information.
402             using @FSDB_ARGS as parameters.
403             It creates the actual Fsdb::IO objects, opens the files (or whatever),
404             and reads the headers.
405             It returns the $FSDB_IO option.
406              
407             $IO_DIRECTION must be "input" or "output".
408              
409             Since it does IO, finish_io_option should only be called from setup,
410             not parse_options.
411              
412             Can be called once per IO stream.
413              
414             =cut
415              
416             sub finish_one_io_option ($$$@) {
417 0     0 1   my $self = shift @_;
418 0           my $direction = shift @_;
419 0           my $token = shift @_;
420              
421 0           my $fsdb;
422             # fast return a raw fh, if that's an option (-raw_fh=>1 in @fsdb_args)
423 0 0 0       if ($#_ >= 1 && $_[0] eq '-raw_fh' && $_[1]) {
      0        
424 0           return $token;
425             };
426 0 0         if (ref($token) =~ /^Fsdb::IO/) {
427             # assume the user gave us a good one
428 0           $fsdb = $token;
429             } else {
430 0           my $token_ref = ref($token);
431 0           my $token_type = undef;
432 0 0 0       if ($token_ref =~ /^Fsdb::BoundedQueue/) {
    0 0        
    0          
    0          
    0          
433 0           $token_type = '-queue';
434             } elsif ($token_ref =~ /^IO::/) {
435 0           $token_type = '-fh';
436             } elsif ($token_ref eq '' && $token eq '-') {
437 0           $token_type = '-file';
438             } elsif ($token_ref eq '' && $token =~ /^\*main/) {
439 0           $token_type = '-fh';
440             } elsif ($token_ref eq '') {
441 0           $token_type = '-file'; # assume filename
442             } else {
443 0           croak "unknown token type in Fsdb::Filter::finish_one_io_option\n";
444             };
445 0 0         if ($direction eq 'input') {
    0          
446 0           $fsdb = new Fsdb::IO::Reader($token_type => $token, @_);
447             } elsif ($direction eq 'output') {
448 0           $fsdb = new Fsdb::IO::Writer($token_type => $token, @_);
449             } else {
450 0           croak "unknown direction in Fsdb::Filter::finish_one_io_option\n"; };
451             };
452             # Next line is die, not croak, because croak always appends line number.
453 0 0         $fsdb->error and die $self->{_prog} . ": cannot open $direction: " . $fsdb->error . "\n";
454 0           return $fsdb;
455             }
456              
457             =head2 finish_io_option
458              
459             $filter->finish_io_option($io_direction, @fsdb_args);
460              
461             This helper function
462             finishes setting up a Fsdb::IO object in $IO_DIRECTION,
463             using @FSDB_ARGS as parameters.
464             It creates the actual Fsdb::IO objects, opens the files (or whatever),
465             and reads the headers.
466             the resulting Fsdb::IO objects are
467             built from C<$self->{_$IO_DIRECTION}>
468             and are left in C<$self->{_in}> or (C<_out> or C<@_ins>).
469              
470             $IO_DIRECTION must be "input", "inputs" or "output".
471              
472             Since it does IO, finish_io_option should only be called from setup,
473             not parse_options.
474              
475             Can be called once per IO stream.
476              
477             No return value.
478              
479             =cut
480              
481             sub finish_io_option ($$@) {
482 0     0 1   my $self = shift @_;
483 0           my $direction = shift @_;
484              
485             # special case multiple inputs
486 0 0         if ($direction eq 'inputs') {
487 0           my $token;
488 0           foreach $token (@{$self->{_inputs}}) {
  0            
489 0           my $in = $self->finish_one_io_option('input', $token, @_);
490 0           push @{$self->{_ins}}, $in;
  0            
491             };
492 0           return;
493             };
494              
495             # single
496 0           my $fsdb = $self->finish_one_io_option($direction, $self->{"_$direction"}, @_);
497 0 0         $self->{($direction eq 'input' ? '_in' : '_out')} = $fsdb;
498             }
499              
500             =head2 direction_to_stdio
501              
502             $fh = direction_to_stdio($direction)
503              
504             Private internal routing. Give a filehandle for STDIN or
505             STDOUT based on $DIRECTION == 'input or 'output'
506              
507             =cut
508              
509             sub direction_to_stdio($;$){ # private
510 0     0 1   my($direction, $encoding) = @_;
511 0           my $fh = new IO::Handle;
512 0 0         if ($direction eq 'input') {
    0          
513 0           $fh = $fh->fdopen(fileno(STDIN), "r");
514             } elsif ($direction eq 'output') {
515 0           $fh = $fh->fdopen(fileno(STDOUT), "w");
516             } else {
517 0           croak "bad direction";
518             };
519 0 0         $encoding = ":utf8" if (!defined($encoding));
520 0           binmode $fh, $encoding;
521 0           return $fh;
522             }
523              
524              
525             =head2 finish_fh_io_option
526              
527             $filter->finish_fh_io_option($io_direction);
528              
529             This helper function
530             creates a filehandle in $IO_DIRECTION.
531             Compare to finish_io_option which creates a Fsdb::IO object.
532             It creates the actual IO::File objects, opens the files (or whatever).
533             The filehandle is
534             built from C<$self->{_$IO_DIRECTION}>
535             and are left in C<$self->{_in}> or (C<_out>).
536              
537             $IO_DIRECTION must be "input" or "output".
538              
539             This function does no IO.
540              
541             No return value.
542              
543             =cut
544              
545             sub finish_fh_io_option ($$;$) {
546 0     0 1   my($self, $direction, $encoding) = @_;
547 0 0 0       croak "finish_fh_io_option: bad direction $direction\n"
548             if (!($direction eq 'input' || $direction eq 'output'));
549 0           my $token = $self->{"_$direction"};
550              
551 0           my $fh;
552 0 0         if (ref($token) =~ /^Fsdb::IO/) {
    0          
    0          
    0          
553 0           croak "finish_fh_io_option: expected IO::Handle but got Fsdb::IO object.\n";
554             } elsif (ref($token) =~ /^Fsdb::BoundedQueue/) {
555 0           croak "finish_fh_io_option: doesn't currently handle Fsdb::BoundedQueue objects.\n";
556             } elsif (ref($token) =~ /^IO::/) {
557             # assume we got a good one passed in
558 0           $fh = $token;
559             } elsif (ref($token) eq '') {
560             # (ref($token) == '' if $token == *main::STDIN or $token eq '-'
561 0 0         if ($token eq '-') {
562 0           $fh = direction_to_stdio($direction, $encoding);
563             } else {
564             # assume it's a glob to a filehandle
565 0           $fh = $token;
566             };
567             } else {
568 0 0         $fh = new IO::File($token, ($direction eq 'input' ? 'r' : 'w'));
569 0 0         $encoding = ":utf8" if (!defined($encoding));
570 0           binmode $fh, $encoding;
571             };
572              
573 0 0         $self->{($direction eq 'input' ? '_in' : '_out')} = $fh;
574             }
575              
576             =head2 setup
577              
578             $filter->setup();
579              
580             Do any setup that requires minimal IO
581             (for example, reading and parsing headers).
582              
583             Called exactly once.
584              
585             =cut
586              
587             sub setup ($) {
588 0     0 1   my $self = shift @_;
589 0           die "Fsdb:Filter: we expect the subprogram to handle setup.\n";
590             }
591              
592             =head2 run
593              
594             $filter->run();
595              
596             Execute the body, typically iterating over the input rows.
597              
598             Called exactly once.
599              
600             =cut
601              
602             sub run ($) {
603 0     0 1   my $self = shift @_;
604 0           die "Fsdb:Filter: we expect the subprogram to handle run.\n";
605             }
606              
607             =head2 compute_program_log
608              
609             $log = $filter->figure_program_log();
610              
611             Compute and return the log entry for a program.
612              
613             =cut
614              
615             sub compute_program_log($) {
616 0     0 1   my $self = shift @_;
617              
618 0           my $args = '';
619             # most refs were cleaned in Fsdb::Filter::get_options
620 0 0 0       if (defined($self->{_orig_argv}) && $#{$self->{_orig_argv}} != -1) {
  0            
621 0           foreach (@{$self->{_orig_argv}}) {
  0            
622 0 0         if (ref($_) eq 'CODE') {
    0          
623 0           $args .= " [ANONYMOUS-CODE]";
624             } elsif ($_ =~ /\s/) {
625             # should use String::ShellQuote, but don't want the dpeendency
626 0           $_ =~ s/\'/'\\\''/g;
627 0           $args .= " '" . $_ . "'";
628             } else {
629 0           $args .= " " . $_;
630             };
631             };
632             };
633 0           my $log = "";
634 0           $log = " | " . $self->{_prog} . $args;
635 0           return $log;
636             }
637              
638             =head2 finish
639              
640             $filter->finish();
641              
642             Write out any trailing comments and close output.
643              
644             =cut
645              
646             sub finish($) {
647 0     0 1   my $self = shift @_;
648              
649 0 0         if (!defined($self->{_out})) {
650 0           my $problems = '';
651 0 0         $problems .= "delay_comments " if (defined($self->{_delay_comments}));
652 0 0         $problems .= "logprog " if ($self->{_logprog});
653 0 0         $problems .= "save_output " if (defined($self->{_save_output}));
654 0 0         carp "finish with no _out object and $problems\n"
655             if ($problems ne '');
656             };
657              
658 0 0         if (defined($self->{_delay_comments})) {
659 0           foreach (@{$self->{_delay_comments}}) {
  0            
660 0           $_->flush($self->{_out});
661             };
662             };
663              
664 0 0         if ($self->{_logprog}) {
665             # ick, OO programming with broken objects...
666 0 0 0       if (ref($self->{_out}) eq '' || ref($self->{_out}) =~ /^IO::/) {
667 0           $self->{_out}->print("# " . $self->compute_program_log() . "\n");
668             } else {
669 0           $self->{_out}->write_comment($self->compute_program_log());
670             };
671             };
672 0           ${$self->{_save_output}} = $self->{_out}
673 0 0         if (defined($self->{_save_output}));
674             $self->{_out}->close
675 0 0 0       if ($self->{_close} && defined($self->{_out}) && !defined($self->{_save_output}));
      0        
676             }
677              
678             =head2 setup_run_finish
679              
680             $filter->setup_run_finish();
681              
682             Shorthand for doing everything needed to run a command straightaway.
683              
684             =cut
685              
686             sub setup_run_finish ($) {
687 0     0 1   my $self = shift @_;
688 0           $self->setup();
689 0           $self->run();
690 0           $self->finish();
691             }
692              
693             =head2 info
694              
695             $filter->info($INFOTYPE)
696              
697             Return information about what the filter does.
698             Infotypes:
699              
700             =over 4
701              
702             =item input_type
703             Types of input accepted.
704             Raw types are: "fsdbtext", "fsdbobj", "fsdb*", "text", or "none".
705              
706             =item output_type
707             Type of output produced.
708             Same format as input_type.
709              
710             =item input_count
711             Number of input streams (usually 1).
712              
713             =item output_count
714             Number of input streams (usually 1).
715              
716             =back
717              
718             =cut
719              
720             sub info ($) {
721 0     0 1   my $self = shift @_;
722 0           my $key = shift @_;
723 0           return $self->{_info}{$key};
724             }
725              
726              
727             =head1 CLASS-SPECIFIC UTILITY ROUTINES
728              
729             Filter has some class-specific utility routines in it.
730             (I.e., they know about $self.)
731              
732             =head2 create_pass_comments_sub
733              
734             $filter->create_pass_comments_sub
735             or
736             $filter->create_pass_comments_sub('_VALUE');
737              
738             Creates a code block suitable for passing to C
739             C<-comment_handler>
740             that passes comments through to C<$self->{_out}>.
741             Or with the optional argument, through C<$self->{_VALUE}>.
742              
743             =cut
744              
745             sub create_pass_comments_sub ($;$)
746             {
747 0     0 1   my $self = shift @_;
748 0   0       my($value) = $_[0] // '_out';
749             # one extra level of indirection to allow for delayed opening of _out
750 0     0     return sub { $self->{$value}->write_raw(@_); };
  0            
751             }
752              
753             =head2 create_tolerant_pass_comments_sub
754              
755             $filter->create_tolerant_pass_comments_sub
756             or
757             $filter->create_tolerant_pass_comments_sub('_VALUE');
758              
759             Like C<$self->create_pass_comments_sub>,
760             but this version tolerates the output not being opened.
761             In those cases, comments are discarded.
762             I use carefully to guarantee consistent results.
763              
764             A symptom requiring tolerance is to get an error like
765             "Can't call method "write_raw" on an undefined value at /usr/lib/perl5/vendor_perl/5.10.0/Fsdb/Filter.pm line 678."
766             (which will be the sub create_pass_comments_sub ($;$) line in create_pass_comments.)
767              
768              
769             =cut
770              
771             sub create_tolerant_pass_comments_sub ($;$)
772             {
773 0     0 1   my $self = shift @_;
774 0   0       my($value) = $_[0] // '_out';
775             # print STDERR "## create_tolerant_pass_comments_sub on $value, " . ref($self->{$value}) . "\n";
776             # one extra level of indirection to allow for delayed opening of _out
777             return sub {
778             $self->{$value}->write_raw(@_)
779 0 0   0     if (defined($self->{$value}));
780 0           };
781             }
782              
783             =head2 create_delay_comments_sub
784              
785             $filter->create_delay_comments_sub($optional_value);
786              
787             Creates a code block suitable for passing to Fsdb::IO::Readers -comment_handler
788             that will buffer comments for automatic (from $self->final) after all other IO.
789             No output occurs until finish() is called,
790             at which time C<$self-E{_out}> must be a live Fsdb object.
791              
792             =cut
793              
794             sub create_delay_comments_sub ($;$) {
795 0     0 1   my $self = shift @_;
796 0   0       my($value) = $_[0] // '_out';
797 0           my $dpc = new Fsdb::Support::DelayPassComments(\$self->{$value});
798 0           push (@{$self->{_delay_comments}}, $dpc);
  0            
799 0     0     return sub { $dpc->enqueue(@_); };
  0            
800             }
801              
802              
803             =head2 create_compare_code
804              
805             $filter->create_compare_code($a_fsdb, $b_fsdb, $a_fref_name, $b_fref_name).
806              
807             Write compare code based on sort-style options
808             stored in C<$self->{_sort_argv}>.
809             C<$A_FSDB> and C<$B_FSDB> are the L object that defines the schemas
810             for the two objects.
811             We assume the variables C<$a> and C<$b> point to arefs;
812             these names can be overridden by specifying
813             C<$A_FREF_NAME> and C<$B_FREF_NAME>.
814              
815             Returns undef if there are no fields in C<$self->{_sort_argv}>.
816             =cut
817              
818             sub create_compare_code ($$;$$) {
819 0     0 1   my($self, $a_fsdb, $b_fsdb, $a_name, $b_name) = @_;
820 0 0         $a_name = 'a' if (!defined($a_name));
821 0 0         $b_name = 'b' if (!defined($b_name));
822              
823             #
824             # A word about the 'no warnings "numeric"' bit:
825             # we want to compare numeric data with <=>,
826             # but that emits warnings for our empty value "-".
827             # We COULD filter that in Perl, but all the checking would make
828             # it much, much slower, and the Perl core has to check anyway.
829             # It turns out, <=> does The Right Thing,
830             # in that (any non-numeric) == (any non-numeric)
831             # and (any non-numeric) < (any numeric).
832             # So we just turn off warnings.
833             # But. Just. Here.
834             #
835 0           my $compare_code = "sub {\n" .
836             "\tno warnings \"numeric\";\n" .
837             "\t\treturn\n";
838 0           my ($reverse, $numeric) = (0, 0);
839 0           my $arg;
840 0           my $fields_found = 0;
841 0           foreach $arg (@{$self->{_sort_argv}}) {
  0            
842 0 0         if ($arg eq '-r') {
    0          
    0          
    0          
    0          
843 0           $reverse = 1;
844             } elsif ($arg eq '-R') {
845 0           $reverse = 0;
846             } elsif ($arg eq '-n') {
847 0           $numeric = 1;
848             } elsif ($arg eq '-N') {
849 0           $numeric = 0;
850             } elsif ($arg =~ /^-/) {
851 0           croak $self->{_prog} . ": internal error: unknown option $arg in sort key\n";
852             } else {
853 0 0         my ($left) = ($reverse ? $b_name : $a_name);
854 0 0         my ($right) = ($reverse ? $a_name : $b_name);
855 0           my $left_coli = $a_fsdb->col_to_i($arg);
856 0           my $right_coli = $b_fsdb->col_to_i($arg);
857 0 0         if ($reverse) {
858 0           my $tmp_coli = $left_coli;
859 0           $left_coli = $right_coli;
860 0           $right_coli = $tmp_coli;
861             };
862 0 0 0       croak $self->{_prog} . ": unknown column name $arg in sort key\n"
863             if (!defined($left_coli) || !defined($right_coli));
864 0 0         $compare_code .= "\t" . '($' . $left . '->[' . $left_coli . '] ' .
    0          
    0          
865             ($numeric ? "<=>" : "cmp") .
866             ' $' . $right . '->[' . $right_coli . ']) || ' .
867             ' # ' . $arg .
868             ($reverse ? ", descending" : ", ascending") .
869             ($numeric ? " numeric" : " lexical") .
870             "\n";
871             # note that we don't currently handle NaN comparisons returning undef
872 0           $fields_found++;
873             };
874             };
875 0           $compare_code .= "\t0; # match\n};\n";
876 0 0         return undef if ($fields_found == 0);
877 0           return $compare_code;
878             }
879              
880             =head2 numeric_formatting
881              
882             $out = $self->numeric_formatting($x)
883              
884             Display a floating point number $x using $self->{_format},
885             handling possible non-numeric "-" as a special case.
886              
887             =cut
888              
889             sub numeric_formatting {
890 0     0 1   my ($self, $x) = @_;
891 0 0         return $x if ($x eq '-');
892 0           return sprintf($self->{_format}, $x);
893             }
894              
895             =head2 setup_exactly_two_inputs
896              
897             $self->setup_exactly_two_inputs
898              
899             Ensure that there are exactly two input streams.
900             Common to L and L.
901              
902             =cut
903              
904             sub setup_exactly_two_inputs {
905 0     0 1   my($self) = @_;
906 0 0         if ($#{$self->{_inputs}} == -1) {
  0            
907 0           croak $self->{_prog} . ": too few input sources specified, use --input.\n";
908             };
909 0 0         if ($#{$self->{_inputs}} > 1) {
  0            
910 0           croak $self->{_prog} . ": too input sources specified, dbmerge only hanldes two at once.\n";
911             };
912 0 0         if ($#{$self->{_inputs}} == 0) {
  0            
913             # need to use stdin?
914             # my $token = new IO::Handle;
915             # $token->fdopen(fileno(STDIN), "r");
916             # unshift @{$self->{_inputs}}, $token;
917 0           unshift @{$self->{_inputs}}, '-';
  0            
918             };
919 0 0         croak if ($#{$self->{_inputs}} != 1); # assert
  0            
920             }
921              
922              
923             =head1 NON-CLASS UTILITY ROUTINES
924              
925             Filter also has some utility routines that are not part of the class structure.
926             They are not exported.
927              
928             (none currently)
929              
930             =cut
931              
932             1;