File Coverage

blib/lib/Fsdb/Filter/dbcolstats.pm
Criterion Covered Total %
statement 27 207 13.0
branch 0 126 0.0
condition 0 30 0.0
subroutine 9 21 42.8
pod 6 6 100.0
total 42 390 10.7


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # dbcolstats.pm
5             # Copyright (C) 1991-2015 by John Heidemann
6             # $Id: b8f85fa383507a09ebfc72e644fadd6e1d5ceed0 $
7             #
8             # This program is distributed under terms of the GNU general
9             # public license, version 2. See the file COPYING
10             # in $dblibdir for details.
11             #
12              
13             package Fsdb::Filter::dbcolstats;
14              
15             =head1 NAME
16              
17             dbcolstats - compute statistics on a fsdb column
18              
19             =head1 SYNOPSIS
20              
21             dbcolstats [-amS] [-c ConfidenceFraction] [-q NumberOfQuantiles] column
22              
23             =head1 DESCRIPTION
24              
25             Compute statistics over a COLUMN of data.
26             Records containing non-numeric data are considered null
27             do not contribute to the stats (with the C<-a> option
28             they are treated as zeros).
29              
30             Confidence intervals are a t-test (+/- (t_{a/2})*s/sqrt(n))
31             and assume the population takes a normal distribution
32             with a small number of samples (< 100).
33              
34             By default,
35             all statistics are computed for as a population I (with an ``n-1'' term),
36             not as representing the whole population (using ``n'').
37             Select between them with B<--sample> or B<--nosample>.
38             When you measure the entire population, use the latter option.
39              
40             The output of this program is probably best looked at after
41             reformatting with L.
42              
43             Dbcolstats runs in O(1) memory. Median or quantile requires sorting the
44             data and invokes dbsort. Sorting will run in constant RAM but
45             O(number of records) disk space. If median or quantile is required
46             and the data is already sorted, dbcolstats will run more efficiently with
47             the -S option.
48              
49              
50             =head1 OPTIONS
51              
52             =over 4
53              
54             =item B<-a> or B<--include-non-numeric>
55              
56             Compute stats over all records (treat non-numeric records
57             as zero rather than just ignoring them).
58              
59             =item B<-c FRACTION> or B<--confidence FRACTION>
60              
61             Specify FRACTION for the confidence interval.
62             Defaults to 0.95 for a 95% confidence factor.
63              
64             =item B<-f FORMAT> or B<--format FORMAT>
65              
66             Specify a L-style format for output statistics.
67             Defaults to C<%.5g>.
68              
69             =item B<-m> or B<--median>
70              
71             Compute median value. (Will sort data if necessary.)
72             (Median is the quantitle for N=2.)
73              
74             =item B<-q N> or B<--quantile N>
75              
76             Compute quantile (quartile when N is 4),
77             or an arbitrary quantile for other values of N,
78             where the scores that are 1 Nth of the way across the population.
79              
80             =item B<--sample>
81              
82             Compute I population statistics
83             (e.g., the sample standard deviation),
84             assuming I degrees of freedom.
85              
86             =item B<--nosample>
87              
88             Compute I population statistics
89             (e.g., the population standard devation).
90              
91             =item B<-S> or B<--pre-sorted>
92              
93             Assume data is already sorted.
94             With one -S, we check and confirm this precondition.
95             When repeated, we skip the check.
96              
97             =item B<--parallelism=N> or C<-j N>
98              
99             Allow sorting to happen in parallel.
100             Defaults on.
101             (Only relevant if using non-pre-sorted data with quantiles.)
102              
103             =item B<-F> or B<--fs> or B<--fieldseparator> S
104              
105             Specify the field (column) separator as C.
106             See L for valid field separators.
107              
108             =item B<-T TmpDir>
109              
110             where to put temporary data.
111             Only used if median or quantiles are requested.
112             Also uses environment variable TMPDIR, if -T is
113             not specified.
114             Default is /tmp.
115              
116             =item B<-k KeyField>
117              
118             Do multi-stats, grouped by each key.
119             Assumes keys are sorted. (Use dbmultistats to guarantee sorting order.)
120              
121              
122             =back
123              
124              
125             =for comment
126             begin_standard_fsdb_options
127              
128             This module also supports the standard fsdb options:
129              
130             =over 4
131              
132             =item B<-d>
133              
134             Enable debugging output.
135              
136             =item B<-i> or B<--input> InputSource
137              
138             Read from InputSource, typically a file name, or C<-> for standard input,
139             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
140              
141             =item B<-o> or B<--output> OutputDestination
142              
143             Write to OutputDestination, typically a file name, or C<-> for standard output,
144             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
145              
146             =item B<--autorun> or B<--noautorun>
147              
148             By default, programs process automatically,
149             but Fsdb::Filter objects in Perl do not run until you invoke
150             the run() method.
151             The C<--(no)autorun> option controls that behavior within Perl.
152              
153             =item B<--help>
154              
155             Show help.
156              
157             =item B<--man>
158              
159             Show full manual.
160              
161             =back
162              
163             =for comment
164             end_standard_fsdb_options
165              
166              
167             =head1 SAMPLE USAGE
168              
169             =head2 Input:
170              
171             #fsdb absdiff
172             0
173             0.046953
174             0.072074
175             0.075413
176             0.094088
177             0.096602
178             # | /home/johnh/BIN/DB/dbrow
179             # | /home/johnh/BIN/DB/dbcol event clock
180             # | dbrowdiff clock
181             # | /home/johnh/BIN/DB/dbcol absdiff
182              
183             =head2 Command:
184              
185             cat data.fsdb | dbcolstats absdiff
186              
187             =head2 Output:
188              
189             #fsdb mean stddev pct_rsd conf_range conf_low conf_high conf_pct sum sum_squared min max n
190             0.064188 0.036194 56.387 0.037989 0.026199 0.102180.95 0.38513 0.031271 0 0.096602 6
191             # | /home/johnh/BIN/DB/dbrow
192             # | /home/johnh/BIN/DB/dbcol event clock
193             # | dbrowdiff clock
194             # | /home/johnh/BIN/DB/dbcol absdiff
195             # | dbcolstats absdiff
196             # 0.95 confidence intervals assume normal distribution and small n.
197              
198             =head1 SEE ALSO
199              
200             L, handles multiple experiments in a single file.
201              
202             L, to pretty-print the output of dbcolstats.
203              
204             L, to compute an even more general version of median/quantiles.
205              
206             L, to compute z-scores or t-scores for each row
207              
208             L, to see if two sample populations are statistically different.
209              
210             L.
211              
212             =head1 BUGS
213              
214             The algorithms used to compute variance have not been
215             audited to check for numerical stability.
216             (See F).)
217             Variance may be incorrect when standard deviation
218             is small relative to the mean.
219              
220             The field C implies percentage, but it's actually
221             reported as a fraction (0.95 means 95%).
222              
223             Because of limits of floating point, statistics on numbers of
224             widely different scales may be incorrect.
225             See the test cases F for examples.
226              
227              
228             =head1 CLASS FUNCTIONS
229              
230             =cut
231              
232             @ISA = qw(Fsdb::Filter);
233             ($VERSION) = 2.0;
234              
235 1     1   14155 use strict;
  1         1  
  1         25  
236 1     1   5 use Pod::Usage;
  1         1  
  1         66  
237              
238 1     1   5 use Fsdb::IO::Reader;
  1         1  
  1         15  
239 1     1   2 use Fsdb::IO::Writer;
  1         1  
  1         14  
240 1     1   2 use Fsdb::Filter;
  1         1  
  1         16  
241 1     1   4 use Fsdb::Filter::dbpipeline qw(dbpipeline_sink dbsort);
  1         0  
  1         39  
242 1     1   4 use Fsdb::Support qw($is_numeric_regexp);
  1         0  
  1         73  
243 1     1   4 use Fsdb::Support::TDistribution qw(t_distribution);
  1         1  
  1         37  
244 1     1   8 use Fsdb::Support::NamedTmpfile;
  1         1  
  1         1612  
245              
246              
247             =head2 new
248              
249             $filter = new Fsdb::Filter::dbcolstats(@arguments);
250              
251             Create a new dbcolstats object, taking command-line arguments.
252              
253             =cut
254              
255             sub new($@) {
256 0     0 1   my $class = shift @_;
257 0           my $self = $class->SUPER::new(@_);
258 0           bless $self, $class;
259 0           $self->set_defaults;
260 0           $self->parse_options(@_);
261 0           $self->SUPER::post_new();
262 0           return $self;
263             }
264              
265              
266             =head2 set_defaults
267              
268             $filter->set_defaults();
269              
270             Internal: set up defaults.
271              
272             =cut
273              
274             sub set_defaults($) {
275 0     0 1   my($self) = @_;
276 0           $self->SUPER::set_defaults();
277 0           $self->{_target_column} = undef;
278 0           $self->{_confidence_fraction} = 0.95;
279 0           $self->{_format} = "%.5g";
280 0           $self->{_quantile} = undef;
281 0           $self->{_median} = undef; # special case: renames the output field
282 0           $self->{_sample} = 1;
283 0           $self->{_pre_sorted} = 0;
284 0           $self->{_include_non_numeric} = undef;
285 0           $self->{_fscode} = undef;
286 0           $self->{_max_parallelism} = undef;
287 0           $self->{_key_column} = undef;
288 0           $self->set_default_tmpdir;
289             }
290              
291             =head2 parse_options
292              
293             $filter->parse_options(@ARGV);
294              
295             Internal: parse command-line arguments.
296              
297             =cut
298              
299             sub parse_options($@) {
300 0     0 1   my $self = shift @_;
301              
302 0           my(@argv) = @_;
303             $self->get_options(
304             \@argv,
305 0     0     'help|?' => sub { pod2usage(1); },
306 0     0     'man' => sub { pod2usage(-verbose => 2); },
307             'a|include-non-numeric!' => \$self->{_include_non_numeric},
308             'autorun!' => \$self->{_autorun},
309             'close!' => \$self->{_close},
310             'c|confidence=f' => \$self->{_confidence_fraction},
311             'd|debug+' => \$self->{_debug},
312             'f|format=s' => \$self->{_format},
313             'F|fs|cs|fieldseparator|columnseparator=s' => \$self->{_fscode},
314 0     0     'i|input=s' => sub { $self->parse_io_option('input', @_); },
315             'j|parallelism=i' => \$self->{_max_parallelism},
316             'k|key=s' => \$self->{_key_column},
317             'log!' => \$self->{_logprog},
318             'm|median!' => \$self->{_median},
319 0     0     'o|output=s' => sub { $self->parse_io_option('output', @_); },
320             'q|quantile=i' => \$self->{_quantile},
321             's|sample!' => \$self->{_sample},
322             'S|pre-sorted+' => \$self->{_pre_sorted},
323             'T|tmpdir|tempdir=s' => \$self->{_tmpdir},
324             'saveoutput=s' => \$self->{_save_output},
325 0 0         ) or pod2usage(2);
326 0           $self->parse_target_column(\@argv);
327             }
328              
329             =head2 setup
330              
331             $filter->setup();
332              
333             Internal: setup, parse headers.
334              
335             =cut
336              
337              
338             sub setup($) {
339 0     0 1   my($self) = @_;
340              
341 0 0         pod2usage(2) if (!defined($self->{_target_column}));
342              
343 0 0         print STDERR "dbcolstats: pre-input setup\n" if ($self->{_debug} > 2);
344 0           $self->finish_io_option('input', -comment_handler => $self->create_delay_comments_sub);
345 0 0         print STDERR "dbcolstats: post-input setup\n" if ($self->{_debug} > 2);
346 0           $self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
347             croak $self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n"
348 0 0         if (!defined($self->{_target_coli}));
349 0           $self->{_key_coli} = undef;
350 0 0         if (defined($self->{_key_column})) {
351 0           $self->{_key_coli} = $self->{_in}->col_to_i($self->{_key_column});
352             croak($self->{_prog} . ": key column " . $self->{_key_column} . " is not in input stream.\n")
353 0 0         if (!defined($self->{_key_coli}));
354             };
355 0           my $read_fastpath_sub = $self->{_in}->fastpath_sub();
356 0           $self->{_read_fastpath_sub} = $read_fastpath_sub;
357              
358 0           my(@headers) = (qw(mean stddev pct_rsd conf_range conf_low conf_high
359             conf_pct sum sum_squared min max n));
360 0 0         push(@headers, "median") if ($self->{_median});
361 0 0         if ($self->{_quantile}) {
362 0           foreach (1..($self->{_quantile}-1)) {
363 0           push(@headers, "q$_");
364             };
365             };
366 0 0         unshift(@headers, $self->{_key_column}) if (defined($self->{_key_column}));
367 0 0         print STDERR "dbcolstats: pre-output setup\n" if ($self->{_debug} > 2);
368 0           my @output_options = (-cols => \@headers);
369             unshift (@output_options, -fscode => $self->{_fscode})
370 0 0         if (defined($self->{_fscode}));
371 0           $self->finish_io_option('output', @output_options);
372 0 0         print STDERR "dbcolstats: post-output setup\n" if ($self->{_debug} > 2);
373              
374 0 0 0       if ($self->{_quantile} || $self->{_median}) {
375             croak($self->{_prog} . ": cannot currently do median or quantile with a key column\n")
376 0 0         if (defined($self->{_key_column}));
377 0           $self->{_save_out_filename} = Fsdb::Support::NamedTmpfile::alloc($self->{_tmpdir});
378             # sorting needed?
379 0           my $save_out;
380 0           my(@writer_args) = (-cols => [qw(data)]);
381 0 0         print STDERR "dbcolstats: pre-saveoutput setup\n" if ($self->{_debug} > 2);
382 0 0         if (!$self->{_pre_sorted}) {
383 0           my $sorter_fred;
384 0           my(@dbsort_args) = qw(-n data);
385             push (@dbsort_args, '--parallelism', $self->{_max_parallelism})
386 0 0         if (defined($self->{_max_parallelism}));
387 0 0         print STDERR "dbcolstats: doing sorter thread\n" if ($self->{_debug} > 2);
388             ($save_out, $sorter_fred) = dbpipeline_sink(\@writer_args,
389             '--output' => $self->{_save_out_filename},
390 0           dbsort(@dbsort_args));
391 0           $self->{_sorter_fred} = $sorter_fred;
392             } else {
393             # no, just write it ourselves
394 0           $save_out = new Fsdb::IO::Writer('-file' => $self->{_save_out_filename}, @writer_args);
395             };
396 0           $self->{_save_out} = $save_out;
397 0 0         print STDERR "dbcolstats: post-saveoutput setup\n" if ($self->{_debug} > 2);
398             } else {
399 0 0         print STDERR "dbcolstats: no saveoutput needed\n" if ($self->{_debug} > 2);
400 0           $self->{_save_out} = undef;
401             };
402             }
403              
404             =head2 _round_up
405              
406             $i = _round_up($x);
407              
408             Internal: Round up to the next integer.
409              
410             =cut
411              
412             sub _round_up($) {
413 0     0     my($x) = @_;
414 0           my($xi) = int($x);
415 0 0         return ($x > $xi) ? $xi+1 : $xi;
416             }
417              
418             =head2 _compute_quantile
419              
420             ($median, $quantile_aref) = _compute_quantile($n, $mean);
421              
422             Internal: Compute quantile from the saved data.
423             Not generalizable.
424             We assume the saved output is closed before we enter.
425              
426             =cut
427              
428             sub _compute_quantile($$$) {
429 0     0     my ($self, $n, $mean) = @_;
430              
431 0 0 0       return if (!($self->{_quantile} || $self->{_median}));
432 0           my $effective_quantile = $self->{_quantile};
433 0 0         $effective_quantile = 2 if (!defined($effective_quantile));
434              
435 0           my $median;
436             my @q;
437 0 0         if ($n <= 1) {
438 0           $median = $mean;
439 0           push(@q, ($mean) x $effective_quantile);
440 0           return ($median, \@q);
441             };
442              
443 0           my $save_in = new Fsdb::IO::Reader(-file => $self->{_save_out_filename});
444 0 0         $save_in->error && die $self->{_prog} . ": re-read error " . $save_in->error;
445              
446             # To handle the ugly case of having more ntiles than
447             # data, we detect it and replicate the data until we have more
448             # replicated_data than ntiles.
449 0 0         my($replicate_data) = ($n >= $effective_quantile+1) ? 1 : _round_up(($effective_quantile+1.0)/$n);
450 0           my($replicated_n) = $n * $replicate_data;
451              
452             # Also note that the array of quantiles and the number of
453             # data elements read are both 1-based and not 0-based like
454             # most perl stuff. This is to make the math easier.
455 0           my $median_i = _round_up($replicated_n / 2);
456 0           my $ntile_frac = ($replicated_n + 0.0) / ($effective_quantile + 0.0);
457 0           my($x, $last_x, $next_q_i);
458 0           @q = (0); # note that q is primed with 0 (to fill that zero element)
459 0           my($replicates_left) = 0;
460 0           my($i); # note that i counts from 1!
461 0           for ($i = 1; $#q+1 < $effective_quantile; $i++) {
462 0 0         if (--$replicates_left <= 0) {
463 0           my $fref = $save_in->read_rowobj;
464 0 0         die "internal error re-reading data\n" if (ref($fref) ne 'ARRAY');
465 0           $x = $fref->[0];
466 0           $replicates_left = $replicate_data;
467             # Verify sorted order (in case the user lied to us
468             # about pre-sorting).
469 0 0 0       if (defined($last_x) && $x < $last_x) {
470 0 0         my($info) = ($self->{_pre_sorted} ? " (internal error in dbsort)" : " (user specified -S for pre-sorted data but it is unsorted)");
471 0           die $self->{_prog} . ": cannot process data that is out of order between $last_x and $x $info.\n";
472             };
473 0           $last_x = $x;
474             };
475 0 0         if ($i == $median_i) { $median = $x; };
  0            
476 0 0         $next_q_i = (_round_up($ntile_frac * ($#q + 1.0) )) if (!defined($next_q_i));
477             # print "d: q=$#q nq=$next_q_i i=$i\n";
478 0 0         if ($i == $next_q_i) { push(@q, $x); $next_q_i = undef; };
  0            
  0            
479             };
480 0           return ($median, \@q);
481             };
482              
483              
484             =head2 run_one_key
485              
486             $filter->run_one_key();
487              
488             Internal: run over each row, for a given key.
489              
490             =cut
491             sub run_one_key($) {
492 0     0 1   my($self) = @_;
493              
494 0 0         print STDERR "dbcolstats: starting run\n" if ($self->{_debug} > 2);
495              
496             # xxx: should eval all this to factor out constants from runtime
497 0           my($xf) = $self->{_target_coli};
498 0           my($key_column) = $self->{_key_column};
499              
500 0           my($n) = 0;
501 0           my($sx) = 0;
502 0           my($sxx) = 0;
503 0           my $min;
504             my $max;
505 0           my $key;
506 0           my $last_key = $self->{_holdover_key};
507 0           my $holdover_data = $self->{_holdover_data};
508 0           $self->{_holdover_key} = $self->{_holdover_data} = undef;
509              
510 0           my $fref;
511             my $x;
512              
513             {
514 0           my $save_out = $self->{_save_out};
  0            
515 0           my $read_fastpath_sub = $self->{_read_fastpath_sub};
516              
517 0           my $code = q'
518             while (1) {
519             if (defined($holdover_data)) {
520             $x = $holdover_data; # and key was set earlier
521             $holdover_data = undef;
522             } else {
523             $fref = &{$read_fastpath_sub}();
524             last if (!defined($fref));
525             $x = $fref->[' . $xf . q'];
526             ';
527 0 0         if (defined($self->{_key_column})) {
528             $code .= q'
529 0           $key = $fref->[' . $self->{_key_coli} . '];
530             if (!defined($last_key)) {
531             $last_key = $key;
532             } elsif ($key ne $last_key) {
533             $self->{_holdover_key} = $key;
534             $self->{_holdover_data} = $x;
535             last;
536             };
537             ';
538             };
539 0           $code .= q'
540             };
541             ';
542              
543             $code .= 'next if ($x !~ /' . $is_numeric_regexp . "/);\n"
544 0 0         if (!$self->{_include_non_numeric});
545 0           $code .= q'
546             $x += 0.0; # force numeric
547             $n++;
548             $sx += $x;
549             $sxx += $x * $x;
550             ';
551 0 0         $code .= 'print STDERR "dbcolstats: save-out write\n";' . "\n" if ($self->{_debug} > 2);
552              
553 0 0 0       if ($self->{_quantile} || $self->{_median}) {
554             # note that as of perl-5.14 we must force numeric or perl truncates floats to ints :-(
555 0           $code .= q'
556             my(@row);
557             $row[0] = $x + 0; # force numeric, as guaranteed by above
558             $save_out->write_rowobj(\@row);
559             ';
560             };
561 0 0         $code .= 'print STDERR "dbcolstats: post save-out write\n";' . "\n" if ($self->{_debug} > 2);
562 0           $code .= q'
563             if (!defined($min)) {
564             $min = $max = $x;
565             } else {
566             $min = $x if ($x < $min);
567             $max = $x if ($x > $max);
568             };
569             };';
570              
571             # run it
572 0 0         print STDERR "dbcolstats: eval'ing code\n" if ($self->{_debug});
573 0 0         print $code if ($self->{_debug});
574 0           eval $code;
575 0 0         $@ and die $self->{_prog} . ": internal error in eval.: $@\n";
576              
577             # clean up
578 0 0 0       if ($self->{_quantile} || $self->{_median}) {
579 0 0         print STDERR "dbcolstats: closing save-out\n" if ($self->{_debug} > 2);
580 0           $self->{_save_out}->close;
581 0 0         print STDERR "dbcolstats: post closing save-out\n" if ($self->{_debug} > 2);
582             };
583             }
584              
585             #
586             # Make sure we cleaned up before we do any computation.
587             #
588 0 0         if (defined($self->{_sorter_fred})) {
589             # let sorting finish
590 0 0         print STDERR "dbcolstats: join on sorter thread\n" if ($self->{_debug} > 2);
591 0           $self->{_sorter_fred}->join();
592 0           $self->{_sorter_fred} = undef;
593 0 0         print STDERR "dbcolstats: post join on sorter thread\n" if ($self->{_debug} > 2);
594             };
595              
596             #
597             # Compute stats.
598             #
599 0 0         my $mean = ($n == 0 ? "-" : $sx / $n);
600             # stddev = s, not s^2, approximates omega
601             # Check for special cases:
602             # $n <= 1 => divide by zero
603             # all same data value => can sometimes get very small or negative
604             # stddev (due to rounding error)
605             # for these cases, $stddev = 0
606 0           my $stddev;
607 0 0         if ($n == 0) {
608 0           $stddev = "-";
609             } else {
610             $stddev = ($n <= 1 || $max == $min) ? 0 :
611 0 0 0       sqrt(($sxx - $n * $mean * $mean) / ($n - ($self->{_sample} ? 1 : 0)));
    0          
612             };
613 0           my $pct_rsd;
614 0 0 0       if ($stddev eq '-' || $mean eq '-' || $mean == 0) {
      0        
615 0           $pct_rsd = "-";
616             } else {
617 0           $pct_rsd = ($stddev / $mean) * 100;
618             };
619             #
620             # Confidence intervals from "Probability and Statistics for Engineers",
621             # Second Edition, 1986, Scheaffer and McClave, p. 242.
622             #
623 0           my $conf_half;
624 0 0         if ($n <= 1) {
625 0           $conf_half = "-";
626             } else {
627 0           my $conf_alpha = (1.0 - $self->{_confidence_fraction}) / 2.0;
628 0           $conf_half = t_distribution($n - 1, $conf_alpha) * $stddev / sqrt($n);
629             };
630 0 0         my $conf_low = ($conf_half eq '-' ? '-' : $mean - $conf_half);
631 0 0         my $conf_high = ($conf_half eq '-' ? '-' : $mean + $conf_half);
632              
633             #
634             # Compute median/quantile.
635             #
636 0           my($median, $q_aref) = $self->_compute_quantile($n, $mean);
637              
638             #
639             # Output the results.
640             #
641             # xxx: bug work-around: the +0s on conf_pct, min, max are
642             # because perl-5.14.2-191.fc16.x86_64
643             # truncates the floating-point portion of these values otherwise.
644             #
645             my %out_hash = (
646             mean => $self->numeric_formatting($mean),
647             stddev => $self->numeric_formatting($stddev),
648             pct_rsd => $self->numeric_formatting($pct_rsd),
649             conf_range => $self->numeric_formatting($conf_half),
650             conf_low => $self->numeric_formatting($conf_low),
651             conf_high => $self->numeric_formatting($conf_high),
652 0 0 0       conf_pct => $self->{_confidence_fraction} + 0,
    0 0        
653             sum => $self->numeric_formatting($sx),
654             sum_squared => $self->numeric_formatting($sxx),
655             min => (!defined($min) || $min eq '-' ? $min : $min + 0),
656             max => (!defined($max) || $max eq '-' ? $max : $max + 0),
657             n => $n,
658             );
659             # my $bug_workaround = "xxx: conf_pct : $out_hash{conf_pct}\n";
660 0 0         $out_hash{median} = $median if ($self->{_median});
661 0 0         if ($self->{_quantile}) {
662 0           foreach (1..($self->{_quantile}-1)) {
663 0           $out_hash{"q$_"} = $q_aref->[$_];
664             };
665             };
666 0 0         if (defined($key_column)) {
667 0           $out_hash{$key_column} = $last_key;
668             };
669              
670 0           $self->{_out}->write_row_from_href(\%out_hash);
671             }
672              
673             =head2 run
674              
675             $filter->run();
676              
677             Internal: run over each row, for one or many keys.
678              
679             =cut
680             sub run($) {
681 0     0 1   my($self) = @_;
682 0           $self->{_holdover_key} = $self->{_holdove_data} = undef;
683 0           for (;;) {
684 0           $self->run_one_key();
685 0 0         last if (!defined($self->{_holdover_key}));
686             };
687             }
688              
689             =head1 AUTHOR and COPYRIGHT
690              
691             Copyright (C) 1991-2015 by John Heidemann
692              
693             This program is distributed under terms of the GNU general
694             public license, version 2. See the file COPYING
695             with the distribution for details.
696              
697             =cut
698              
699             1;