File Coverage

blib/lib/Fsdb/Filter.pm
Criterion Covered Total %
statement 29 265 10.9
branch 0 146 0.0
condition 0 60 0.0
subroutine 10 38 26.3
pod 25 25 100.0
total 64 534 11.9


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # Filter.pm
5             # $Id: 7843a6bb9a62b736e670fcf61fdb3e66994cf79f $
6             #
7             # Copyright (C) 2007-2008 by John Heidemann
8             #
9             # This program is free software; you can redistribute it and/or
10             # modify it under the terms of the GNU General Public License
11             # version 2, as published by the Free Software Foundation.
12             #
13             # This program is distributed in the hope that it will be useful,
14             # but WITHOUT ANY WARRANTY; without even the implied warranty of
15             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16             # GNU General Public License for more details.
17             #
18             # You should have received a copy of the GNU General Public License along
19             # with this program; if not, write to the Free Software Foundation, Inc.,
20             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21             #
22              
23              
24             package Fsdb::Filter;
25              
26             =head1 NAME
27              
28             Fsdb::Filter - base class for Fsdb filters
29              
30             =head1 DESCRIPTION
31              
32             Fsdb::Filter is the virtual base class for Fsdb filters.
33              
34             Users will typically invoke individual programs via the command line
35             (for example, see L)
36             or string together several in a Perl program
37             as described in L.
38              
39             For new Filter developers, internal processing is:
40              
41             new
42             set_defaults
43             parse_options
44             autorun if desired
45             parse_options # optionally called additional times
46             setup # does IO on header
47             run # does IO on data
48             finish # any shutdown
49              
50             In addition, the C method returns metadata about a given filter.
51              
52             =head1 FUNCTIONS
53              
54             =cut
55              
56             @ISA = ();
57             ($VERSION) = 1.0;
58              
59 2     2   8379 use strict;
  2         4  
  2         42  
60 2     2   29 use 5.010;
  2         5  
61 2     2   6 use Carp qw(carp croak);
  2         3  
  2         97  
62 2     2   893 use IO::Handle;
  2         8561  
  2         69  
63 2     2   767 use IO::File;
  2         2775  
  2         188  
64 2     2   1217 use Getopt::Long qw(GetOptionsFromArray :config no_ignore_case bundling permute);
  2         14087  
  2         19  
65              
66 2     2   1074 use Fsdb::IO::Reader;
  2         4  
  2         55  
67 2     2   718 use Fsdb::IO::Writer;
  2         4  
  2         44  
68 2     2   646 use Fsdb::Support;
  2         3  
  2         61  
69 2     2   689 use Fsdb::Support::DelayPassComments;
  2         2  
  2         4148  
70              
71             =head2 new
72              
73             $fsdb = new Fsdb::Filter;
74              
75             Create a new filter object, calling set_defaults
76             and parse_options. A user program will call a specific
77             filter (say Fsdb::Filter::dbcol) to do processing.
78             See also L for aliases that remove the wordiness.
79              
80             =cut
81              
82             sub new {
83 0     0 1   my ($class) = @_;
84 0           my $self = bless {}, $class;
85 0           $self->set_defaults;
86 0           return $self;
87             }
88              
89             =head2 post_new
90              
91             $filter->post_new();
92              
93             Called when the subclass is done with new,
94             giving Fsdb::Filter a chance to autorun.
95              
96             =cut
97              
98             sub post_new {
99 0     0 1   my $self = shift @_;
100             $self->setup_run_finish()
101 0 0         if ($self->{_autorun});
102             }
103              
104              
105             =head2 set_defaults
106              
107             $filter->set_defaults();
108              
109             Set up object defaults.
110             Called once during new.
111              
112             Fsdb::Filter::set_defaults does some general setup,
113             tracking module invocation and preparing for one input and output stream.
114              
115              
116             =cut
117              
118             sub set_defaults ($) {
119 0     0 1   my $self = shift @_;
120              
121             # see the info() method for documentation
122             $self->{_info} = {
123 0           input_type => 'fsdb*',
124             input_count => 1,
125             output_type => 'fsdb*',
126             output_count => 1,
127             };
128              
129 0           my($package, $filename, $line) = caller(1);
130 0           $filename =~ s@^.*/@@g; # strip junk
131 0           $filename =~ s@\.pm$@@; # strip junk
132 0           $self->{_prog} = $filename;
133 0           $self->{_orig_argv} = undef;
134 0           $self->{_logprog} = 1;
135 0           $self->{_close} = 1;
136 0           $self->{_save_output} = undef;
137 0           $self->{_empty} = '-';
138              
139 0           $self->{_debug} = 0;
140 0           $self->{_error} = undef;
141              
142             # Fsdb::IO objects
143 0           $self->{_in} = undef;
144 0           $self->{_out} = undef;
145             # filehandles or Fsdb::BoundedQueue:
146             #
147             # These next two lines cause obnoxious warnings in some versions
148             # $self->{_input} = \*STDIN;
149             # $self->{_output} = \*STDOUT;
150             # of perl. in 5.10.1:
151             #
152             # Unbalanced string table refcount: (2) for "blib/script/dbjoin" during global destruction.
153             # Unbalanced string table refcount: (1) for "STDOUT" during global destruction.
154             # Unbalanced string table refcount: (1) for "STDIN" during global destruction.
155             # Scalars leaked: 2
156             # Unbalanced string table refcount: (2) for "blib/script/dbjoin" during global destruction.
157             # Unbalanced string table refcount: (1) for "STDOUT" during global destruction.
158             # Unbalanced string table refcount: (1) for "STDIN" during global destruction.
159             # Scalars leaked: 2
160             #
161             # fix: switch to magic variables '-' and catch them later in finish_fh_io_option.
162             #
163 0           $self->{_input} = '-';
164 0           $self->{_output} = '-';
165              
166 0           $self->{_autorun} = undef;
167             };
168              
169             =head2 set_default_tmpdir
170              
171             $filter->set_default_tmpdir
172              
173             Figure out a tmpdir, from environment variables if necessary.
174              
175             =cut
176              
177             sub set_default_tmpdir($;$) {
178 0     0 1   my $self = shift @_;
179              
180 0           foreach ($_[0], $ENV{'TMPDIR'}, "/tmp") {
181 0 0         if (defined($_)) {
182 0           $self->{_tmpdir} = $_;
183 0           return;
184             };
185             };
186 0           die "internal error in set_default_tmpdir";
187             }
188              
189             =head2 parse_options
190              
191             $filter->parse_options(@ARGV);
192              
193             Parse_options is called one I times to parse ARGV-style options.
194             It should not do any IO or any irreverssable actions; defer those to startup.
195              
196             Fsdb::Filter::parse_options does no work; the subclass is expected
197             to call Fsdb::Filter::get_options() for all arguments.
198              
199             Most modules implement certain common fsdb options, listed below.
200              
201             =for comment
202             (Note that this text in Fsdb::Filter.pm is the master copy,
203             replicated in to all the module.pm files via C.)
204              
205             =for comment
206             begin_standard_fsdb_options
207              
208             This module also supports the standard fsdb options:
209              
210             =over 4
211              
212             =item B<-d>
213              
214             Enable debugging output.
215              
216             =item B<-i> or B<--input> InputSource
217              
218             Read from InputSource, typically a file name, or C<-> for standard input,
219             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
220              
221             =item B<-o> or B<--output> OutputDestination
222              
223             Write to OutputDestination, typically a file name, or C<-> for standard output,
224             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
225              
226             =item B<--autorun> or B<--noautorun>
227              
228             By default, programs process automatically,
229             but Fsdb::Filter objects in Perl do not run until you invoke
230             the run() method.
231             The C<--(no)autorun> option controls that behavior within Perl.
232              
233             =item B<--noclose>
234              
235             By default, programs close their output when done.
236             For some cases where objects are used internally, C<--noclose>
237             may be used to leave output open for further I/O.
238             (This option is only supported by some filters.)
239              
240             =item B<--saveoutput $OUT_REF>
241              
242             By default, programs close their output when done.
243             With this option, programs in Perl can have a subprogram create
244             an output refrence and return it to the caller in C<$OUT_REF>.
245             The caller can then use it for further I/O.
246             (This option is only supported by some filters.)
247              
248             =item B<--help>
249              
250             Show help.
251              
252             =item B<--man>
253              
254             Show full manual.
255              
256             =back
257              
258             =for comment
259             end_standard_fsdb_options
260              
261              
262             =cut
263              
264             sub parse_options ($@) {
265 0     0 1   my $self = shift @_;
266 0           die "Fsdb:Filter: we expect the subprogram to handle parse_options.\n";
267             }
268              
269             =head2 parse_target_column
270              
271             $self->parse_target_column(\@argv);
272              
273             A helper function: allow one column to be specified as the C<_target_column>.
274              
275             =cut
276              
277             sub parse_target_column($$) {
278 0     0 1   my($self, $argv_ref) = @_;
279              
280 0 0 0       if ($#{$argv_ref} > 0 || $#{$argv_ref} == 0 && defined($self->{_target_column})) {
  0 0 0        
  0            
281             # xxx: next line is "die" because with perl v5.8.8,
282             # I get this error message with croak:
283             # Bizarre copy of HASH in sassign at /usr/lib/perl5/5.8.8/Carp/Heavy.pm line 45.
284 0           die $self->{_prog} . ": cannot specify multiple columns\n";
285 0           } elsif ($#{$argv_ref} == 0) {
286 0           $self->{_target_column} = $argv_ref->[0];
287             };
288             }
289              
290             =head2 get_options
291              
292             $success = $filter->get_options(\@argv, "v+" => \$verbose, ...)
293              
294             get_options is just like Getopt::Long's GetOptions,
295             but takes the argument list as the first argument.
296             This list is modified and any non-options are returned.
297             It also saves _orig_argv in itself.
298              
299             =cut
300              
301             sub get_options($$@) {
302 0     0 1   my $self = shift @_;
303 0           my $argv_ref = shift @_;
304 0           my @opt_specs = @_;
305             # hacky interface to GetOptions, we have to copy to and from ARGV
306 0           foreach my $p (@$argv_ref) {
307 0           my $p_copy = $p;
308 0 0         if (ref($p_copy) =~ /\:\:/) {
309             # remove the internal memory cruft for internal perl objects
310 0           $p_copy = ref($p_copy);
311 0           $p_copy =~ s/\=.*$//;
312 0 0         $p_copy = "::FsdbIPC" if ($p_copy =~ /^(Fsdb::BoundedQueue|IO::Pipe)/);
313 0           $p_copy = "[$p_copy]";
314             };
315 0           push (@{$self->{_orig_argv}}, $p_copy);
  0            
316             };
317 0           my $result = GetOptionsFromArray($argv_ref, @opt_specs);
318 0           return $result;
319             }
320              
321             =head2 parse_sort_option
322              
323             $fsdb_io = $filter->parse_sort_option($option_name, $target);
324              
325             This helper function handles sorting options and column names
326             as described in L. We normalize long sort options
327             to unbundled short options
328             and accumulate them in $self->{_sort_argv}.
329              
330             =cut
331              
332             sub parse_sort_option ($$$) {
333 0     0 1   my $self = shift @_;
334 0           my ($option_name, $target) = @_;
335              
336 0           my $sort_aref = $self->{_sort_argv};
337 0 0 0       if ($option_name eq '<>') {
    0 0        
    0 0        
    0 0        
    0          
338 0           push (@$sort_aref, $target);
339             } elsif ($option_name eq 'n' || $option_name eq 'numeric') {
340 0           push (@$sort_aref, '-n');
341             } elsif ($option_name eq 'N' || $option_name eq 'lexical') {
342 0           push (@$sort_aref, '-N');
343             } elsif ($option_name eq 'r' || $option_name eq 'descending') {
344 0           push (@$sort_aref, '-r');
345             } elsif ($option_name eq 'R' || $option_name eq 'descending') {
346 0           push (@$sort_aref, '-R');
347             } else {
348 0           croak "parse_sort_option: unknown option $option_name for target $target\n";
349             };
350             }
351              
352              
353              
354             =head2 parse_io_option
355              
356             $fsdb_io = $filter->parse_io_option($io_direction, $option_name, $target);
357              
358             This helper function handles C<--input> or C<--output> options,
359             without doing any setup.
360              
361             It fills in $self->{_$IO_DIRECTION} with the resulting object,
362             which is either a file handle or Fsdb::Filter::Piepline object,
363             and expects C to convert
364             this token into a full Fsdb::IO object.
365              
366             $IO_DIRECTION is usually input or output, but it can also be inputs
367             (with an "s") when multiple input sources are allowed.
368              
369             =cut
370              
371             sub parse_io_option ($$$$) {
372 0     0 1   my $self = shift @_;
373 0           my $direction = shift @_;
374 0           my $option_name = shift @_;
375             # Next line wackiness: perl-5.10's Getopt::Long passes an object, not a
376             # string, so force it to be a string.
377 0 0         $option_name = "$option_name" if (ref($option_name));
378 0           my $target = shift @_;
379 0 0         $target = $option_name if (!defined($target)); # xxx: sometimes we seem to loose an argument (bug in Getopt::Long?)
380              
381 0 0         my($mode) = $direction eq 'output' ? "w" : "r";
382              
383 0           my $token = $target;
384              
385 0 0 0       if ($direction eq 'input' || $direction eq 'output') {
    0 0        
386 0           $self->{"_$direction"} = $token;
387             } elsif ($direction eq 'inputs' || $direction eq 'outputs') {
388 0           push (@{$self->{"_$direction"}}, $token);
  0            
389             } else {
390 0           croak "internal error: bad direction";
391             };
392             }
393              
394              
395             =head2 finish_one_io_option
396              
397             $fsdb_io = $filter->finish_io_option($io_direction, $token, @fsdb_args);
398              
399             This helper function
400             finishes setting up a Fsdb::IO object in $IO_DIRECTION,
401             using $TOKEN as information.
402             using @FSDB_ARGS as parameters.
403             It creates the actual Fsdb::IO objects, opens the files (or whatever),
404             and reads the headers.
405             It returns the $FSDB_IO option.
406              
407             $IO_DIRECTION must be "input" or "output".
408              
409             Since it does IO, finish_io_option should only be called from setup,
410             not parse_options.
411              
412             Can be called once per IO stream.
413              
414             =cut
415              
416             sub finish_one_io_option ($$$@) {
417 0     0 1   my $self = shift @_;
418 0           my $direction = shift @_;
419 0           my $token = shift @_;
420              
421 0           my $fsdb;
422             # fast return a raw fh, if that's an option (-raw_fh=>1 in @fsdb_args)
423 0 0 0       if ($#_ >= 1 && $_[0] eq '-raw_fh' && $_[1]) {
      0        
424 0           return $token;
425             };
426 0 0         if (ref($token) =~ /^Fsdb::IO/) {
427             # assume the user gave us a good one
428 0           $fsdb = $token;
429             } else {
430 0           my $token_ref = ref($token);
431 0           my $token_type = undef;
432 0 0 0       if ($token_ref =~ /^Fsdb::BoundedQueue/) {
    0 0        
    0          
    0          
    0          
433 0           $token_type = '-queue';
434             } elsif ($token_ref =~ /^IO::/) {
435 0           $token_type = '-fh';
436             } elsif ($token_ref eq '' && $token eq '-') {
437 0           $token_type = '-file';
438             } elsif ($token_ref eq '' && $token =~ /^\*main/) {
439 0           $token_type = '-fh';
440             } elsif ($token_ref eq '') {
441 0           $token_type = '-file'; # assume filename
442             } else {
443 0           croak "unknown token type in Fsdb::Filter::finish_one_io_option\n";
444             };
445 0 0         if ($direction eq 'input') {
    0          
446 0           $fsdb = new Fsdb::IO::Reader($token_type => $token, @_);
447             } elsif ($direction eq 'output') {
448 0           $fsdb = new Fsdb::IO::Writer($token_type => $token, @_);
449             } else {
450 0           croak "unknown direction in Fsdb::Filter::finish_one_io_option\n"; };
451             };
452             # Next line is die, not croak, because croak always appends line number.
453 0 0         $fsdb->error and die $self->{_prog} . ": cannot open $direction: " . $fsdb->error . "\n";
454 0           return $fsdb;
455             }
456              
457             =head2 finish_io_option
458              
459             $filter->finish_io_option($io_direction, @fsdb_args);
460              
461             This helper function
462             finishes setting up a Fsdb::IO object in $IO_DIRECTION,
463             using @FSDB_ARGS as parameters.
464             It creates the actual Fsdb::IO objects, opens the files (or whatever),
465             and reads the headers.
466             the resulting Fsdb::IO objects are
467             built from C<$self->{_$IO_DIRECTION}>
468             and are left in C<$self->{_in}> or (C<_out> or C<@_ins>).
469              
470             $IO_DIRECTION must be "input", "inputs" or "output".
471              
472             Since it does IO, finish_io_option should only be called from setup,
473             not parse_options.
474              
475             Can be called once per IO stream.
476              
477             No return value.
478              
479             =cut
480              
481             sub finish_io_option ($$@) {
482 0     0 1   my $self = shift @_;
483 0           my $direction = shift @_;
484              
485             # special case multiple inputs
486 0 0         if ($direction eq 'inputs') {
487 0           my $token;
488 0           foreach $token (@{$self->{_inputs}}) {
  0            
489 0           my $in = $self->finish_one_io_option('input', $token, @_);
490 0           push @{$self->{_ins}}, $in;
  0            
491             };
492 0           return;
493             };
494              
495             # single
496 0           my $fsdb = $self->finish_one_io_option($direction, $self->{"_$direction"}, @_);
497 0 0         $self->{($direction eq 'input' ? '_in' : '_out')} = $fsdb;
498             }
499              
500             =head2 direction_to_stdio
501              
502             $fh = direction_to_stdio($direction)
503              
504             Private internal routing. Give a filehandle for STDIN or
505             STDOUT based on $DIRECTION == 'input or 'output'
506              
507             =cut
508              
509             sub direction_to_stdio($;$){ # private
510 0     0 1   my($direction, $encoding) = @_;
511 0           my $fh = new IO::Handle;
512 0 0         if ($direction eq 'input') {
    0          
513 0           $fh = $fh->fdopen(fileno(STDIN), "r");
514             } elsif ($direction eq 'output') {
515 0           $fh = $fh->fdopen(fileno(STDOUT), "w");
516             } else {
517 0           croak "bad direction";
518             };
519 0 0         $encoding = ":utf8" if (!defined($encoding));
520 0           binmode $fh, $encoding;
521 0           return $fh;
522             }
523              
524              
525             =head2 finish_fh_io_option
526              
527             $filter->finish_fh_io_option($io_direction);
528              
529             This helper function
530             creates a filehandle in $IO_DIRECTION.
531             Compare to finish_io_option which creates a Fsdb::IO object.
532             It creates the actual IO::File objects, opens the files (or whatever).
533             The filehandle is
534             built from C<$self->{_$IO_DIRECTION}>
535             and are left in C<$self->{_in}> or (C<_out>).
536              
537             $IO_DIRECTION must be "input" or "output".
538              
539             This function does no IO.
540              
541             No return value.
542              
543             =cut
544              
545             sub finish_fh_io_option ($$;$) {
546 0     0 1   my($self, $direction, $encoding) = @_;
547 0 0 0       croak "finish_fh_io_option: bad direction $direction\n"
548             if (!($direction eq 'input' || $direction eq 'output'));
549 0           my $token = $self->{"_$direction"};
550              
551 0           my $fh;
552 0 0         if (ref($token) =~ /^Fsdb::IO/) {
    0          
    0          
    0          
553 0           croak "finish_fh_io_option: expected IO::Handle but got Fsdb::IO object.\n";
554             } elsif (ref($token) =~ /^Fsdb::BoundedQueue/) {
555 0           croak "finish_fh_io_option: doesn't currently handle Fsdb::BoundedQueue objects.\n";
556             } elsif (ref($token) =~ /^IO::/) {
557             # assume we got a good one passed in
558 0           $fh = $token;
559             } elsif (ref($token) eq '') {
560             # (ref($token) == '' if $token == *main::STDIN or $token eq '-'
561 0 0         if ($token eq '-') {
562 0           $fh = direction_to_stdio($direction, $encoding);
563             } else {
564             # assume it's a glob to a filehandle
565 0           $fh = $token;
566             };
567             } else {
568 0 0         $fh = new IO::File($token, ($direction eq 'input' ? 'r' : 'w'));
569 0 0         $encoding = ":utf8" if (!defined($encoding));
570 0           binmode $fh, $encoding;
571             };
572              
573 0 0         $self->{($direction eq 'input' ? '_in' : '_out')} = $fh;
574             }
575              
576             =head2 setup
577              
578             $filter->setup();
579              
580             Do any setup that requires minimal IO
581             (for example, reading and parsing headers).
582              
583             Called exactly once.
584              
585             =cut
586              
587             sub setup ($) {
588 0     0 1   my $self = shift @_;
589 0           die "Fsdb:Filter: we expect the subprogram to handle setup.\n";
590             }
591              
592             =head2 run
593              
594             $filter->run();
595              
596             Execute the body, typically iterating over the input rows.
597              
598             Called exactly once.
599              
600             =cut
601              
602             sub run ($) {
603 0     0 1   my $self = shift @_;
604 0           die "Fsdb:Filter: we expect the subprogram to handle run.\n";
605             }
606              
607             =head2 compute_program_log
608              
609             $log = $filter->figure_program_log();
610              
611             Compute and return the log entry for a program.
612              
613             =cut
614              
615             sub compute_program_log($) {
616 0     0 1   my $self = shift @_;
617              
618 0           my $args = '';
619             # most refs were cleaned in Fsdb::Filter::get_options
620 0 0 0       if (defined($self->{_orig_argv}) && $#{$self->{_orig_argv}} != -1) {
  0            
621 0           foreach (@{$self->{_orig_argv}}) {
  0            
622 0 0         if (ref($_) eq 'CODE') {
623 0           $args .= " [ANONYMOUS-CODE]";
624             } else {
625 0           $args .= " " . Fsdb::Support::shell_quote($_);
626             };
627             };
628             };
629 0           my $log = "";
630 0           $log = " | " . $self->{_prog} . $args;
631 0           return $log;
632             }
633              
634             =head2 finish
635              
636             $filter->finish();
637              
638             Write out any trailing comments and close output.
639              
640             =cut
641              
642             sub finish($) {
643 0     0 1   my $self = shift @_;
644              
645 0 0         if (!defined($self->{_out})) {
646 0           my $problems = '';
647 0 0         $problems .= "delay_comments " if (defined($self->{_delay_comments}));
648 0 0         $problems .= "logprog " if ($self->{_logprog});
649 0 0         $problems .= "save_output " if (defined($self->{_save_output}));
650 0 0         carp "finish with no _out object and $problems\n"
651             if ($problems ne '');
652             };
653              
654 0 0         if (defined($self->{_delay_comments})) {
655 0           foreach (@{$self->{_delay_comments}}) {
  0            
656 0           $_->flush($self->{_out});
657             };
658             };
659              
660 0 0         if ($self->{_logprog}) {
661             # ick, OO programming with broken objects...
662 0 0 0       if (ref($self->{_out}) eq '' || ref($self->{_out}) =~ /^IO::/) {
663 0           $self->{_out}->print("# " . $self->compute_program_log() . "\n");
664             } else {
665 0           $self->{_out}->write_comment($self->compute_program_log());
666             };
667             };
668 0           ${$self->{_save_output}} = $self->{_out}
669 0 0         if (defined($self->{_save_output}));
670             $self->{_out}->close
671 0 0 0       if ($self->{_close} && defined($self->{_out}) && !defined($self->{_save_output}));
      0        
672             }
673              
674             =head2 setup_run_finish
675              
676             $filter->setup_run_finish();
677              
678             Shorthand for doing everything needed to run a command straightaway.
679              
680             =cut
681              
682             sub setup_run_finish ($) {
683 0     0 1   my $self = shift @_;
684 0           $self->setup();
685 0           $self->run();
686 0           $self->finish();
687             }
688              
689             =head2 info
690              
691             $filter->info($INFOTYPE)
692              
693             Return information about what the filter does.
694             Infotypes:
695              
696             =over 4
697              
698             =item input_type
699             Types of input accepted.
700             Raw types are: "fsdbtext", "fsdbobj", "fsdb*", "text", or "none".
701              
702             =item output_type
703             Type of output produced.
704             Same format as input_type.
705              
706             =item input_count
707             Number of input streams (usually 1).
708              
709             =item output_count
710             Number of input streams (usually 1).
711              
712             =back
713              
714             =cut
715              
716             sub info ($) {
717 0     0 1   my $self = shift @_;
718 0           my $key = shift @_;
719 0           return $self->{_info}{$key};
720             }
721              
722              
723             =head1 CLASS-SPECIFIC UTILITY ROUTINES
724              
725             Filter has some class-specific utility routines in it.
726             (I.e., they know about $self.)
727              
728             =head2 create_pass_comments_sub
729              
730             $filter->create_pass_comments_sub
731             or
732             $filter->create_pass_comments_sub('_VALUE');
733              
734             Creates a code block suitable for passing to C
735             C<-comment_handler>
736             that passes comments through to C<$self->{_out}>.
737             Or with the optional argument, through C<$self->{_VALUE}>.
738              
739             =cut
740              
741             sub create_pass_comments_sub ($;$)
742             {
743 0     0 1   my $self = shift @_;
744 0   0       my($value) = $_[0] // '_out';
745             # one extra level of indirection to allow for delayed opening of _out
746 0     0     return sub { $self->{$value}->write_raw(@_); };
  0            
747             }
748              
749             =head2 create_tolerant_pass_comments_sub
750              
751             $filter->create_tolerant_pass_comments_sub
752             or
753             $filter->create_tolerant_pass_comments_sub('_VALUE');
754              
755             Like C<$self->create_pass_comments_sub>,
756             but this version tolerates the output not being opened.
757             In those cases, comments are discarded.
758             I use carefully to guarantee consistent results.
759              
760             A symptom requiring tolerance is to get an error like
761             "Can't call method "write_raw" on an undefined value at /usr/lib/perl5/vendor_perl/5.10.0/Fsdb/Filter.pm line 678."
762             (which will be the sub create_pass_comments_sub ($;$) line in create_pass_comments.)
763              
764              
765             =cut
766              
767             sub create_tolerant_pass_comments_sub ($;$)
768             {
769 0     0 1   my $self = shift @_;
770 0   0       my($value) = $_[0] // '_out';
771             # print STDERR "## create_tolerant_pass_comments_sub on $value, " . ref($self->{$value}) . "\n";
772             # one extra level of indirection to allow for delayed opening of _out
773             return sub {
774             $self->{$value}->write_raw(@_)
775 0 0   0     if (defined($self->{$value}));
776 0           };
777             }
778              
779             =head2 create_delay_comments_sub
780              
781             $filter->create_delay_comments_sub($optional_value);
782              
783             Creates a code block suitable for passing to Fsdb::IO::Readers -comment_handler
784             that will buffer comments for automatic (from $self->final) after all other IO.
785             No output occurs until finish() is called,
786             at which time C<$self-E{_out}> must be a live Fsdb object.
787              
788             =cut
789              
790             sub create_delay_comments_sub ($;$) {
791 0     0 1   my $self = shift @_;
792 0   0       my($value) = $_[0] // '_out';
793 0           my $dpc = new Fsdb::Support::DelayPassComments(\$self->{$value});
794 0           push (@{$self->{_delay_comments}}, $dpc);
  0            
795 0     0     return sub { $dpc->enqueue(@_); };
  0            
796             }
797              
798              
799             =head2 create_compare_code
800              
801             $filter->create_compare_code($a_fsdb, $b_fsdb, $a_fref_name, $b_fref_name).
802              
803             Write compare code based on sort-style options
804             stored in C<$self->{_sort_argv}>.
805             C<$A_FSDB> and C<$B_FSDB> are the L object that defines the schemas
806             for the two objects.
807             We assume the variables C<$a> and C<$b> point to arefs;
808             these names can be overridden by specifying
809             C<$A_FREF_NAME> and C<$B_FREF_NAME>.
810              
811             Returns undef if there are no fields in C<$self->{_sort_argv}>.
812             =cut
813              
814             sub create_compare_code ($$;$$) {
815 0     0 1   my($self, $a_fsdb, $b_fsdb, $a_name, $b_name) = @_;
816 0 0         $a_name = 'a' if (!defined($a_name));
817 0 0         $b_name = 'b' if (!defined($b_name));
818              
819             #
820             # A word about the 'no warnings "numeric"' bit:
821             # we want to compare numeric data with <=>,
822             # but that emits warnings for our empty value "-".
823             # We COULD filter that in Perl, but all the checking would make
824             # it much, much slower, and the Perl core has to check anyway.
825             # It turns out, <=> does The Right Thing,
826             # in that (any non-numeric) == (any non-numeric)
827             # and (any non-numeric) < (any numeric).
828             # So we just turn off warnings.
829             # But. Just. Here.
830             #
831 0           my $compare_code = "sub {\n" .
832             "\tno warnings \"numeric\";\n" .
833             "\t\treturn\n";
834 0           my ($reverse, $numeric) = (0, 0);
835 0           my $arg;
836 0           my $fields_found = 0;
837 0           foreach $arg (@{$self->{_sort_argv}}) {
  0            
838 0 0         if ($arg eq '-r') {
    0          
    0          
    0          
    0          
839 0           $reverse = 1;
840             } elsif ($arg eq '-R') {
841 0           $reverse = 0;
842             } elsif ($arg eq '-n') {
843 0           $numeric = 1;
844             } elsif ($arg eq '-N') {
845 0           $numeric = 0;
846             } elsif ($arg =~ /^-/) {
847 0           croak $self->{_prog} . ": internal error: unknown option $arg in sort key\n";
848             } else {
849 0 0         my ($left) = ($reverse ? $b_name : $a_name);
850 0 0         my ($right) = ($reverse ? $a_name : $b_name);
851 0           my $left_coli = $a_fsdb->col_to_i($arg);
852 0           my $right_coli = $b_fsdb->col_to_i($arg);
853 0 0         if ($reverse) {
854 0           my $tmp_coli = $left_coli;
855 0           $left_coli = $right_coli;
856 0           $right_coli = $tmp_coli;
857             };
858 0 0 0       croak $self->{_prog} . ": unknown column name $arg in sort key\n"
859             if (!defined($left_coli) || !defined($right_coli));
860 0 0         $compare_code .= "\t" . '($' . $left . '->[' . $left_coli . '] ' .
    0          
    0          
861             ($numeric ? "<=>" : "cmp") .
862             ' $' . $right . '->[' . $right_coli . ']) || ' .
863             ' # ' . $arg .
864             ($reverse ? ", descending" : ", ascending") .
865             ($numeric ? " numeric" : " lexical") .
866             "\n";
867             # note that we don't currently handle NaN comparisons returning undef
868 0           $fields_found++;
869             };
870             };
871 0           $compare_code .= "\t0; # match\n};\n";
872 0 0         return undef if ($fields_found == 0);
873 0           return $compare_code;
874             }
875              
876             =head2 numeric_formatting
877              
878             $out = $self->numeric_formatting($x)
879              
880             Display a floating point number $x using $self->{_format},
881             handling possible non-numeric "-" as a special case.
882              
883             =cut
884              
885             sub numeric_formatting {
886 0     0 1   my ($self, $x) = @_;
887 0 0         return $x if ($x eq '-');
888 0           return sprintf($self->{_format}, $x);
889             }
890              
891             =head2 setup_exactly_two_inputs
892              
893             $self->setup_exactly_two_inputs
894              
895             Ensure that there are exactly two input streams.
896             Common to L and L.
897              
898             =cut
899              
900             sub setup_exactly_two_inputs {
901 0     0 1   my($self) = @_;
902 0 0         if ($#{$self->{_inputs}} == -1) {
  0            
903 0           croak $self->{_prog} . ": too few input sources specified, use --input.\n";
904             };
905 0 0         if ($#{$self->{_inputs}} > 1) {
  0            
906 0           croak $self->{_prog} . ": too input sources specified, dbmerge only hanldes two at once.\n";
907             };
908 0 0         if ($#{$self->{_inputs}} == 0) {
  0            
909             # need to use stdin?
910             # my $token = new IO::Handle;
911             # $token->fdopen(fileno(STDIN), "r");
912             # unshift @{$self->{_inputs}}, $token;
913 0           unshift @{$self->{_inputs}}, '-';
  0            
914             };
915 0 0         croak if ($#{$self->{_inputs}} != 1); # assert
  0            
916             }
917              
918              
919             =head1 NON-CLASS UTILITY ROUTINES
920              
921             Filter also has some utility routines that are not part of the class structure.
922             They are not exported.
923              
924             (none currently)
925              
926             =cut
927              
928             1;