File Coverage

blib/lib/Fsdb/Filter/dbcolstats.pm
Criterion Covered Total %
statement 27 209 12.9
branch 0 128 0.0
condition 0 36 0.0
subroutine 9 21 42.8
pod 6 6 100.0
total 42 400 10.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # dbcolstats.pm
5             # Copyright (C) 1991-2015 by John Heidemann
6             # $Id: b8f85fa383507a09ebfc72e644fadd6e1d5ceed0 $
7             #
8             # This program is distributed under terms of the GNU general
9             # public license, version 2. See the file COPYING
10             # in $dblibdir for details.
11             #
12              
13             package Fsdb::Filter::dbcolstats;
14              
15             =head1 NAME
16              
17             dbcolstats - compute statistics on a fsdb column
18              
19             =head1 SYNOPSIS
20              
21             dbcolstats [-amS] [-c ConfidenceFraction] [-q NumberOfQuantiles] column
22              
23             =head1 DESCRIPTION
24              
25             Compute statistics over a COLUMN of data.
26             Records containing non-numeric data are considered null
27             do not contribute to the stats (with the C<-a> option
28             they are treated as zeros).
29              
30             Confidence intervals are a t-test (+/- (t_{a/2})*s/sqrt(n))
31             and assume the population takes a normal distribution
32             with a small number of samples (< 100).
33              
34             By default,
35             all statistics are computed for as a population I (with an ``n-1'' term),
36             not as representing the whole population (using ``n'').
37             Select between them with B<--sample> or B<--nosample>.
38             When you measure the entire population, use the latter option.
39              
40             The output of this program is probably best looked at after
41             reformatting with L.
42              
43             Dbcolstats runs in O(1) memory. Median or quantile requires sorting the
44             data and invokes dbsort. Sorting will run in constant RAM but
45             O(number of records) disk space. If median or quantile is required
46             and the data is already sorted, dbcolstats will run more efficiently with
47             the -S option.
48              
49              
50             =head1 OPTIONS
51              
52             =over 4
53              
54             =item B<-a> or B<--include-non-numeric>
55              
56             Compute stats over all records (treat non-numeric records
57             as zero rather than just ignoring them).
58              
59             =item B<-c FRACTION> or B<--confidence FRACTION>
60              
61             Specify FRACTION for the confidence interval.
62             Defaults to 0.95 for a 95% confidence factor.
63              
64             =item B<-f FORMAT> or B<--format FORMAT>
65              
66             Specify a L-style format for output statistics.
67             Defaults to C<%.5g>.
68              
69             =item B<-m> or B<--median>
70              
71             Compute median value. (Will sort data if necessary.)
72             (Median is the quantitle for N=2.)
73              
74             =item B<-q N> or B<--quantile N>
75              
76             Compute quantile (quartile when N is 4),
77             or an arbitrary quantile for other values of N,
78             where the scores that are 1 Nth of the way across the population.
79              
80             =item B<--sample>
81              
82             Compute I population statistics
83             (e.g., the sample standard deviation),
84             assuming I degrees of freedom.
85              
86             =item B<--nosample>
87              
88             Compute I population statistics
89             (e.g., the population standard devation).
90              
91             =item B<-S> or B<--pre-sorted>
92              
93             Assume data is already sorted.
94             With one -S, we check and confirm this precondition.
95             When repeated, we skip the check.
96             (This flag is ignored if quartiles are not requested.)
97              
98             =item B<--parallelism=N> or C<-j N>
99              
100             Allow sorting to happen in parallel.
101             Defaults on.
102             (Only relevant if using non-pre-sorted data with quantiles.)
103              
104             =item B<-F> or B<--fs> or B<--fieldseparator> S
105              
106             Specify the field (column) separator as C.
107             See L for valid field separators.
108              
109             =item B<-T TmpDir>
110              
111             where to put temporary data.
112             Only used if median or quantiles are requested.
113             Also uses environment variable TMPDIR, if -T is
114             not specified.
115             Default is /tmp.
116              
117             =item B<-k KeyField>
118              
119             Do multi-stats, grouped by each key.
120             Assumes keys are sorted. (Use dbmultistats to guarantee sorting order.)
121              
122             =item B<--output-on-no-input>
123              
124             Enables null output (all fields are "-", n is 0)
125             if we get input with a schema but no records.
126             Without this option, just output the schema but no rows.
127             Default: no output if no input.
128              
129             =back
130              
131              
132             =for comment
133             begin_standard_fsdb_options
134              
135             This module also supports the standard fsdb options:
136              
137             =over 4
138              
139             =item B<-d>
140              
141             Enable debugging output.
142              
143             =item B<-i> or B<--input> InputSource
144              
145             Read from InputSource, typically a file name, or C<-> for standard input,
146             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
147              
148             =item B<-o> or B<--output> OutputDestination
149              
150             Write to OutputDestination, typically a file name, or C<-> for standard output,
151             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
152              
153             =item B<--autorun> or B<--noautorun>
154              
155             By default, programs process automatically,
156             but Fsdb::Filter objects in Perl do not run until you invoke
157             the run() method.
158             The C<--(no)autorun> option controls that behavior within Perl.
159              
160             =item B<--help>
161              
162             Show help.
163              
164             =item B<--man>
165              
166             Show full manual.
167              
168             =back
169              
170             =for comment
171             end_standard_fsdb_options
172              
173              
174             =head1 SAMPLE USAGE
175              
176             =head2 Input:
177              
178             #fsdb absdiff
179             0
180             0.046953
181             0.072074
182             0.075413
183             0.094088
184             0.096602
185             # | /home/johnh/BIN/DB/dbrow
186             # | /home/johnh/BIN/DB/dbcol event clock
187             # | dbrowdiff clock
188             # | /home/johnh/BIN/DB/dbcol absdiff
189              
190             =head2 Command:
191              
192             cat data.fsdb | dbcolstats absdiff
193              
194             =head2 Output:
195              
196             #fsdb mean stddev pct_rsd conf_range conf_low conf_high conf_pct sum sum_squared min max n
197             0.064188 0.036194 56.387 0.037989 0.026199 0.102180.95 0.38513 0.031271 0 0.096602 6
198             # | /home/johnh/BIN/DB/dbrow
199             # | /home/johnh/BIN/DB/dbcol event clock
200             # | dbrowdiff clock
201             # | /home/johnh/BIN/DB/dbcol absdiff
202             # | dbcolstats absdiff
203             # 0.95 confidence intervals assume normal distribution and small n.
204              
205             =head1 SEE ALSO
206              
207             L, handles multiple experiments in a single file.
208              
209             L, to pretty-print the output of dbcolstats.
210              
211             L, to compute an even more general version of median/quantiles.
212              
213             L, to compute z-scores or t-scores for each row
214              
215             L, to see if two sample populations are statistically different.
216              
217             L.
218              
219             =head1 BUGS
220              
221             The algorithms used to compute variance have not been
222             audited to check for numerical stability.
223             (See F).)
224             Variance may be incorrect when standard deviation
225             is small relative to the mean.
226              
227             The field C implies percentage, but it's actually
228             reported as a fraction (0.95 means 95%).
229              
230             Because of limits of floating point, statistics on numbers of
231             widely different scales may be incorrect.
232             See the test cases F for examples.
233              
234              
235             =head1 CLASS FUNCTIONS
236              
237             =cut
238              
239             @ISA = qw(Fsdb::Filter);
240             ($VERSION) = 2.0;
241              
242 1     1   6 use strict;
  1         2  
  1         26  
243 1     1   5 use Pod::Usage;
  1         2  
  1         72  
244              
245 1     1   6 use Fsdb::IO::Reader;
  1         1  
  1         17  
246 1     1   4 use Fsdb::IO::Writer;
  1         2  
  1         14  
247 1     1   4 use Fsdb::Filter;
  1         1  
  1         22  
248 1     1   5 use Fsdb::Filter::dbpipeline qw(dbpipeline_sink dbsort);
  1         2  
  1         39  
249 1     1   4 use Fsdb::Support qw($is_numeric_regexp);
  1         2  
  1         69  
250 1     1   5 use Fsdb::Support::TDistribution qw(t_distribution);
  1         2  
  1         42  
251 1     1   5 use Fsdb::Support::NamedTmpfile;
  1         2  
  1         1700  
252              
253              
254             =head2 new
255              
256             $filter = new Fsdb::Filter::dbcolstats(@arguments);
257              
258             Create a new dbcolstats object, taking command-line arguments.
259              
260             =cut
261              
262             sub new($@) {
263 0     0 1   my $class = shift @_;
264 0           my $self = $class->SUPER::new(@_);
265 0           bless $self, $class;
266 0           $self->set_defaults;
267 0           $self->parse_options(@_);
268 0           $self->SUPER::post_new();
269 0           return $self;
270             }
271              
272              
273             =head2 set_defaults
274              
275             $filter->set_defaults();
276              
277             Internal: set up defaults.
278              
279             =cut
280              
281             sub set_defaults($) {
282 0     0 1   my($self) = @_;
283 0           $self->SUPER::set_defaults();
284 0           $self->{_target_column} = undef;
285 0           $self->{_confidence_fraction} = 0.95;
286 0           $self->{_format} = "%.5g";
287 0           $self->{_quantile} = undef;
288 0           $self->{_median} = undef; # special case: renames the output field
289 0           $self->{_sample} = 1;
290 0           $self->{_pre_sorted} = 0;
291 0           $self->{_include_non_numeric} = undef;
292 0           $self->{_fscode} = undef;
293 0           $self->{_max_parallelism} = undef;
294 0           $self->{_key_column} = undef;
295 0           $self->{_output_on_no_input} = undef;
296 0           $self->set_default_tmpdir;
297             }
298              
299             =head2 parse_options
300              
301             $filter->parse_options(@ARGV);
302              
303             Internal: parse command-line arguments.
304              
305             =cut
306              
307             sub parse_options($@) {
308 0     0 1   my $self = shift @_;
309              
310 0           my(@argv) = @_;
311             $self->get_options(
312             \@argv,
313 0     0     'help|?' => sub { pod2usage(1); },
314 0     0     'man' => sub { pod2usage(-verbose => 2); },
315             'a|include-non-numeric!' => \$self->{_include_non_numeric},
316             'autorun!' => \$self->{_autorun},
317             'close!' => \$self->{_close},
318             'c|confidence=f' => \$self->{_confidence_fraction},
319             'd|debug+' => \$self->{_debug},
320             'f|format=s' => \$self->{_format},
321             'F|fs|cs|fieldseparator|columnseparator=s' => \$self->{_fscode},
322 0     0     'i|input=s' => sub { $self->parse_io_option('input', @_); },
323             'j|parallelism=i' => \$self->{_max_parallelism},
324             'k|key=s' => \$self->{_key_column},
325             'log!' => \$self->{_logprog},
326             'm|median!' => \$self->{_median},
327 0     0     'o|output=s' => sub { $self->parse_io_option('output', @_); },
328             'output-on-no-input!' => \$self->{_output_on_no_input},
329             'q|quantile=i' => \$self->{_quantile},
330             's|sample!' => \$self->{_sample},
331             'S|pre-sorted+' => \$self->{_pre_sorted},
332             'T|tmpdir|tempdir=s' => \$self->{_tmpdir},
333             'saveoutput=s' => \$self->{_save_output},
334 0 0         ) or pod2usage(2);
335 0           $self->parse_target_column(\@argv);
336             }
337              
338             =head2 setup
339              
340             $filter->setup();
341              
342             Internal: setup, parse headers.
343              
344             =cut
345              
346              
347             sub setup($) {
348 0     0 1   my($self) = @_;
349              
350 0 0         pod2usage(2) if (!defined($self->{_target_column}));
351              
352 0 0         print STDERR "dbcolstats: pre-input setup\n" if ($self->{_debug} > 2);
353 0           $self->finish_io_option('input', -comment_handler => $self->create_delay_comments_sub);
354 0 0         print STDERR "dbcolstats: post-input setup\n" if ($self->{_debug} > 2);
355 0           $self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
356             croak $self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n"
357 0 0         if (!defined($self->{_target_coli}));
358 0           $self->{_key_coli} = undef;
359 0 0         if (defined($self->{_key_column})) {
360 0           $self->{_key_coli} = $self->{_in}->col_to_i($self->{_key_column});
361             croak($self->{_prog} . ": key column " . $self->{_key_column} . " is not in input stream.\n")
362 0 0         if (!defined($self->{_key_coli}));
363             };
364 0           my $read_fastpath_sub = $self->{_in}->fastpath_sub();
365 0           $self->{_read_fastpath_sub} = $read_fastpath_sub;
366              
367 0           my(@headers) = (qw(mean stddev pct_rsd conf_range conf_low conf_high
368             conf_pct sum sum_squared min max n));
369 0 0         push(@headers, "median") if ($self->{_median});
370 0 0         if ($self->{_quantile}) {
371 0           foreach (1..($self->{_quantile}-1)) {
372 0           push(@headers, "q$_");
373             };
374             };
375 0 0         unshift(@headers, $self->{_key_column}) if (defined($self->{_key_column}));
376 0 0         print STDERR "dbcolstats: pre-output setup\n" if ($self->{_debug} > 2);
377 0           my @output_options = (-cols => \@headers);
378             unshift (@output_options, -fscode => $self->{_fscode})
379 0 0         if (defined($self->{_fscode}));
380 0           $self->finish_io_option('output', @output_options);
381 0 0         print STDERR "dbcolstats: post-output setup\n" if ($self->{_debug} > 2);
382              
383 0 0 0       if ($self->{_quantile} || $self->{_median}) {
384             croak($self->{_prog} . ": cannot currently do median or quantile with a key column\n")
385 0 0         if (defined($self->{_key_column}));
386 0           $self->{_save_out_filename} = Fsdb::Support::NamedTmpfile::alloc($self->{_tmpdir});
387             # sorting needed?
388 0           my $save_out;
389 0           my(@writer_args) = (-cols => [qw(data)]);
390 0 0         print STDERR "dbcolstats: pre-saveoutput setup\n" if ($self->{_debug} > 2);
391 0 0         if (!$self->{_pre_sorted}) {
392 0           my $sorter_fred;
393 0           my(@dbsort_args) = qw(-n data);
394             push (@dbsort_args, '--parallelism', $self->{_max_parallelism})
395 0 0         if (defined($self->{_max_parallelism}));
396 0 0         print STDERR "dbcolstats: doing sorter thread\n" if ($self->{_debug} > 2);
397             ($save_out, $sorter_fred) = dbpipeline_sink(\@writer_args,
398             '--output' => $self->{_save_out_filename},
399 0           dbsort(@dbsort_args));
400 0           $self->{_sorter_fred} = $sorter_fred;
401             } else {
402             # no, just write it ourselves
403 0           $save_out = new Fsdb::IO::Writer('-file' => $self->{_save_out_filename}, @writer_args);
404             };
405 0           $self->{_save_out} = $save_out;
406 0 0         print STDERR "dbcolstats: post-saveoutput setup\n" if ($self->{_debug} > 2);
407             } else {
408 0 0         print STDERR "dbcolstats: no saveoutput needed\n" if ($self->{_debug} > 2);
409 0           $self->{_save_out} = undef;
410             };
411             }
412              
413             =head2 _round_up
414              
415             $i = _round_up($x);
416              
417             Internal: Round up to the next integer.
418              
419             =cut
420              
421             sub _round_up($) {
422 0     0     my($x) = @_;
423 0           my($xi) = int($x);
424 0 0         return ($x > $xi) ? $xi+1 : $xi;
425             }
426              
427             =head2 _compute_quantile
428              
429             ($median, $quantile_aref) = _compute_quantile($n, $mean);
430              
431             Internal: Compute quantile from the saved data.
432             Not generalizable.
433             We assume the saved output is closed before we enter.
434              
435             =cut
436              
437             sub _compute_quantile($$$) {
438 0     0     my ($self, $n, $mean) = @_;
439              
440 0 0 0       return if (!($self->{_quantile} || $self->{_median}));
441 0           my $effective_quantile = $self->{_quantile};
442 0 0         $effective_quantile = 2 if (!defined($effective_quantile));
443              
444 0           my $median;
445             my @q;
446 0 0         if ($n <= 1) {
447 0           $median = $mean;
448 0           push(@q, ($mean) x $effective_quantile);
449 0           return ($median, \@q);
450             };
451              
452 0           my $save_in = new Fsdb::IO::Reader(-file => $self->{_save_out_filename});
453 0 0         $save_in->error && die $self->{_prog} . ": re-read error " . $save_in->error;
454              
455             # To handle the ugly case of having more ntiles than
456             # data, we detect it and replicate the data until we have more
457             # replicated_data than ntiles.
458 0 0         my($replicate_data) = ($n >= $effective_quantile+1) ? 1 : _round_up(($effective_quantile+1.0)/$n);
459 0           my($replicated_n) = $n * $replicate_data;
460              
461             # Also note that the array of quantiles and the number of
462             # data elements read are both 1-based and not 0-based like
463             # most perl stuff. This is to make the math easier.
464 0           my $median_i = _round_up($replicated_n / 2);
465 0           my $ntile_frac = ($replicated_n + 0.0) / ($effective_quantile + 0.0);
466 0           my($x, $last_x, $next_q_i);
467 0           @q = (0); # note that q is primed with 0 (to fill that zero element)
468 0           my($replicates_left) = 0;
469 0           my($i); # note that i counts from 1!
470 0           for ($i = 1; $#q+1 < $effective_quantile; $i++) {
471 0 0         if (--$replicates_left <= 0) {
472 0           my $fref = $save_in->read_rowobj;
473 0 0         die "internal error re-reading data\n" if (ref($fref) ne 'ARRAY');
474 0           $x = $fref->[0];
475 0           $replicates_left = $replicate_data;
476             # Verify sorted order (in case the user lied to us
477             # about pre-sorting).
478 0 0 0       if (defined($last_x) && $x < $last_x) {
479 0 0         my($info) = ($self->{_pre_sorted} ? " (internal error in dbsort)" : " (user specified -S for pre-sorted data but it is unsorted)");
480 0           die $self->{_prog} . ": cannot process data that is out of order between $last_x and $x $info.\n";
481             };
482 0           $last_x = $x;
483             };
484 0 0         if ($i == $median_i) { $median = $x; };
  0            
485 0 0         $next_q_i = (_round_up($ntile_frac * ($#q + 1.0) )) if (!defined($next_q_i));
486             # print "d: q=$#q nq=$next_q_i i=$i\n";
487 0 0         if ($i == $next_q_i) { push(@q, $x); $next_q_i = undef; };
  0            
  0            
488             };
489 0           return ($median, \@q);
490             };
491              
492              
493             =head2 run_one_key
494              
495             $filter->run_one_key();
496              
497             Internal: run over each row, for a given key.
498              
499             =cut
500             sub run_one_key($) {
501 0     0 1   my($self) = @_;
502              
503 0 0         print STDERR "dbcolstats: starting run\n" if ($self->{_debug} > 2);
504              
505             # xxx: should eval all this to factor out constants from runtime
506 0           my($xf) = $self->{_target_coli};
507 0           my($key_column) = $self->{_key_column};
508              
509 0           my($n) = 0;
510 0           my($sx) = 0;
511 0           my($sxx) = 0;
512 0           my $min;
513             my $max;
514 0           my $key;
515 0           my $last_key = $self->{_holdover_key};
516 0           my $holdover_data = $self->{_holdover_data};
517 0           $self->{_holdover_key} = $self->{_holdover_data} = undef;
518              
519 0           my $fref;
520             my $x;
521              
522             {
523 0           my $save_out = $self->{_save_out};
  0            
524 0           my $read_fastpath_sub = $self->{_read_fastpath_sub};
525              
526 0           my $code = q'
527             while (1) {
528             if (defined($holdover_data)) {
529             $x = $holdover_data; # and key was set earlier
530             $holdover_data = undef;
531             } else {
532             $fref = &{$read_fastpath_sub}();
533             last if (!defined($fref));
534             $x = $fref->[' . $xf . q'];
535             ';
536 0 0         if (defined($self->{_key_column})) {
537             $code .= q'
538 0           $key = $fref->[' . $self->{_key_coli} . '];
539             if (!defined($last_key)) {
540             $last_key = $key;
541             } elsif ($key ne $last_key) {
542             $self->{_holdover_key} = $key;
543             $self->{_holdover_data} = $x;
544             last;
545             };
546             ';
547             };
548 0           $code .= q'
549             };
550             ';
551              
552             $code .= 'next if ($x !~ /' . $is_numeric_regexp . "/);\n"
553 0 0         if (!$self->{_include_non_numeric});
554 0           $code .= q'
555             $x += 0.0; # force numeric
556             $n++;
557             $sx += $x;
558             $sxx += $x * $x;
559             ';
560 0 0         $code .= 'print STDERR "dbcolstats: save-out write\n";' . "\n" if ($self->{_debug} > 2);
561              
562 0 0 0       if ($self->{_quantile} || $self->{_median}) {
563             # note that as of perl-5.14 we must force numeric or perl truncates floats to ints :-(
564 0           $code .= q'
565             my(@row);
566             $row[0] = $x + 0; # force numeric, as guaranteed by above
567             $save_out->write_rowobj(\@row);
568             ';
569             };
570 0 0         $code .= 'print STDERR "dbcolstats: post save-out write\n";' . "\n" if ($self->{_debug} > 2);
571 0           $code .= q'
572             if (!defined($min)) {
573             $min = $max = $x;
574             } else {
575             $min = $x if ($x < $min);
576             $max = $x if ($x > $max);
577             };
578             };';
579              
580             # run it
581 0 0         print STDERR "dbcolstats: eval'ing code\n" if ($self->{_debug});
582 0 0         print $code if ($self->{_debug});
583 0           eval $code;
584 0 0         $@ and die $self->{_prog} . ": internal error in eval.: $@\n";
585              
586             # clean up
587 0 0 0       if ($self->{_quantile} || $self->{_median}) {
588 0 0         print STDERR "dbcolstats: closing save-out\n" if ($self->{_debug} > 2);
589 0           $self->{_save_out}->close;
590 0 0         print STDERR "dbcolstats: post closing save-out\n" if ($self->{_debug} > 2);
591             };
592             }
593              
594             #
595             # Make sure we cleaned up before we do any computation.
596             #
597 0 0         if (defined($self->{_sorter_fred})) {
598             # let sorting finish
599 0 0         print STDERR "dbcolstats: join on sorter thread\n" if ($self->{_debug} > 2);
600 0           $self->{_sorter_fred}->join();
601 0           $self->{_sorter_fred} = undef;
602 0 0         print STDERR "dbcolstats: post join on sorter thread\n" if ($self->{_debug} > 2);
603             };
604              
605             #
606             # Compute stats.
607             #
608 0 0         my $mean = ($n == 0 ? "-" : $sx / $n);
609             # stddev = s, not s^2, approximates omega
610             # Check for special cases:
611             # $n <= 1 => divide by zero
612             # all same data value => can sometimes get very small or negative
613             # stddev (due to rounding error)
614             # for these cases, $stddev = 0
615 0           my $stddev;
616 0 0         if ($n == 0) {
617 0           $stddev = "-";
618             } else {
619             $stddev = ($n <= 1 || $max == $min) ? 0 :
620 0 0 0       sqrt(($sxx - $n * $mean * $mean) / ($n - ($self->{_sample} ? 1 : 0)));
    0          
621             };
622 0           my $pct_rsd;
623 0 0 0       if ($stddev eq '-' || $mean eq '-' || $mean == 0) {
      0        
624 0           $pct_rsd = "-";
625             } else {
626 0           $pct_rsd = ($stddev / $mean) * 100;
627             };
628             #
629             # Confidence intervals from "Probability and Statistics for Engineers",
630             # Second Edition, 1986, Scheaffer and McClave, p. 242.
631             #
632 0           my $conf_half;
633 0 0         if ($n <= 1) {
634 0           $conf_half = "-";
635             } else {
636 0           my $conf_alpha = (1.0 - $self->{_confidence_fraction}) / 2.0;
637 0           $conf_half = t_distribution($n - 1, $conf_alpha) * $stddev / sqrt($n);
638             };
639 0 0         my $conf_low = ($conf_half eq '-' ? '-' : $mean - $conf_half);
640 0 0         my $conf_high = ($conf_half eq '-' ? '-' : $mean + $conf_half);
641              
642             #
643             # Compute median/quantile.
644             #
645 0           my($median, $q_aref) = $self->_compute_quantile($n, $mean);
646              
647             #
648             # Output the results.
649             #
650             # xxx: bug work-around: the +0s on conf_pct, min, max are
651             # because perl-5.14.2-191.fc16.x86_64
652             # truncates the floating-point portion of these values otherwise.
653             #
654             my %out_hash = (
655             mean => $self->numeric_formatting($mean),
656             stddev => $self->numeric_formatting($stddev),
657             pct_rsd => $self->numeric_formatting($pct_rsd),
658             conf_range => $self->numeric_formatting($conf_half),
659             conf_low => $self->numeric_formatting($conf_low),
660             conf_high => $self->numeric_formatting($conf_high),
661 0 0 0       conf_pct => $self->{_confidence_fraction} + 0,
    0 0        
662             sum => $self->numeric_formatting($sx),
663             sum_squared => $self->numeric_formatting($sxx),
664             min => (!defined($min) || $min eq '-' ? $min : $min + 0),
665             max => (!defined($max) || $max eq '-' ? $max : $max + 0),
666             n => $n,
667             );
668             # my $bug_workaround = "xxx: conf_pct : $out_hash{conf_pct}\n";
669 0 0         $out_hash{median} = $median if ($self->{_median});
670 0 0         if ($self->{_quantile}) {
671 0           foreach (1..($self->{_quantile}-1)) {
672 0           $out_hash{"q$_"} = $q_aref->[$_];
673             };
674             };
675 0 0         if (defined($key_column)) {
676 0           $out_hash{$key_column} = $last_key;
677             };
678              
679 0 0 0       if ($n > 0 || ($n == 0 && $self->{_output_on_no_input})) {
      0        
680 0           $self->{_out}->write_row_from_href(\%out_hash);
681             };
682             }
683              
684             =head2 run
685              
686             $filter->run();
687              
688             Internal: run over each row, for one or many keys.
689              
690             =cut
691             sub run($) {
692 0     0 1   my($self) = @_;
693 0           $self->{_holdover_key} = $self->{_holdove_data} = undef;
694 0           for (;;) {
695 0           $self->run_one_key();
696 0 0         last if (!defined($self->{_holdover_key}));
697             };
698             }
699              
700             =head1 AUTHOR and COPYRIGHT
701              
702             Copyright (C) 1991-2015 by John Heidemann
703              
704             This program is distributed under terms of the GNU general
705             public license, version 2. See the file COPYING
706             with the distribution for details.
707              
708             =cut
709              
710             1;