File Coverage

blib/lib/Fsdb.pm
Criterion Covered Total %
statement 9 9 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 12 12 100.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # Fsdb.pm
5             #
6             # Copyright (C) 1991-2016 by John Heidemann
7             #
8             # This program is free software; you can redistribute it and/or
9             # modify it under the terms of the GNU General Public License,
10             # version 2, as published by the Free Software Foundation.
11             #
12             # This program is distributed in the hope that it will be useful,
13             # but WITHOUT ANY WARRANTY; without even the implied warranty of
14             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15             # GNU General Public License for more details.
16             #
17             # You should have received a copy of the GNU General Public License along
18             # with this program; if not, write to the Free Software Foundation, Inc.,
19             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20             #
21              
22             package Fsdb;
23              
24 2     2   214032 use warnings;
  2         4  
  2         80  
25 2     2   10 use strict;
  2         3  
  2         41  
26 2     2   1343 use utf8;
  2         25  
  2         9  
27              
28             =encoding utf8
29              
30             =head1 NAME
31              
32             Fsdb - a flat-text database for shell scripting
33              
34              
35             =cut
36             our $VERSION = '2.62';
37              
38             =head1 SYNOPSIS
39              
40             Fsdb, the flatfile streaming database is package of commands
41             for manipulating flat-ASCII databases from
42             shell scripts. Fsdb is useful to process medium amounts of data (with
43             very little data you'd do it by hand, with megabytes you might want a
44             real database).
45             Fsdb was known as as Jdb from 1991 to Oct. 2008.
46              
47             Fsdb is very good at doing things like:
48              
49             =over 4
50              
51             =item *
52              
53             extracting measurements from experimental output
54              
55             =item *
56              
57             examining data to address different hypotheses
58              
59             =item *
60              
61             joining data from different experiments
62              
63             =item *
64              
65             eliminating/detecting outliers
66              
67             =item *
68              
69             computing statistics on data
70             (mean, confidence intervals, correlations, histograms)
71              
72             =item *
73              
74             reformatting data for graphing programs
75              
76             =back
77              
78             Fsdb is built around the idea of a flat text file as a database.
79             Fsdb files (by convention, with the extension F<.fsdb>),
80             have a header documenting the schema (what the columns mean),
81             and then each line represents a database record (or row).
82              
83             For example:
84              
85             #fsdb experiment duration
86             ufs_mab_sys 37.2
87             ufs_mab_sys 37.3
88             ufs_rcp_real 264.5
89             ufs_rcp_real 277.9
90              
91             Is a simple file with four experiments (the rows),
92             each with a description, size parameter, and run time
93             in the first, second, and third columns.
94              
95             Rather than hand-code scripts to do each special case, Fsdb provides
96             higher-level functions. Although it's often easy throw together a
97             custom script to do any single task, I believe that there are several
98             advantages to using Fsdb:
99              
100             =over 4
101              
102             =item *
103              
104             these programs provide a higher level interface than plain Perl, so
105              
106             =over 4
107              
108             =item **
109              
110             Fewer lines of simpler code:
111              
112             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
113              
114             Picks out just one type of experiment and computes statistics on it,
115             rather than:
116              
117             while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; }
118             $mean = $sum / $n; $std_dev = ...
119              
120             in dozens of places.
121              
122             =back
123              
124             =item *
125              
126             the library uses names for columns, so
127              
128             =over 4
129              
130             =item **
131              
132             No more C<$F[1]>, use C<_duration>.
133              
134             =item **
135              
136             New or different order columns? No changes to your scripts!
137              
138             =back
139              
140             Thus if your experiment gets more complicated with a size parameter,
141             so your log changes to:
142              
143             #fsdb experiment size duration
144             ufs_mab_sys 1024 37.2
145             ufs_mab_sys 1024 37.3
146             ufs_rcp_real 1024 264.5
147             ufs_rcp_real 1024 277.9
148             ufs_mab_sys 2048 45.3
149             ufs_mab_sys 2048 44.2
150              
151             Then the previous scripts still work, even though duration is
152             now the third column, not the second.
153              
154             =item *
155              
156             A series of actions are self-documenting (each program records what it does).
157              
158             =over 4
159              
160             =item **
161              
162             No more wondering what hacks were used to compute the
163             final data, just look at the comments at the end
164             of the output.
165              
166             =back
167              
168             For example, the commands
169              
170             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
171              
172             add to the end of the output the lines
173             # | dbrow _experiment eq "ufs_mab_sys"
174             # | dbcolstats duration
175              
176              
177             =item *
178              
179             The library is mature, supporting large datasets,
180             corner cases, error handling, backed by an automated test suite.
181              
182             =over 4
183              
184             =item **
185              
186             No more puzzling about bad output because your custom script
187             skimped on error checking.
188              
189             =item **
190              
191             No more memory thrashing when you try to sort ten million records.
192              
193             =back
194              
195             =item *
196              
197             Fsdb-2.x supports Perl scripting (in addition to shell scripting),
198             with libraries to do Fsdb input and output, and easy support for pipelines.
199             The shell script
200              
201             dbcol name test1 | dbroweval '_test1 += 5;'
202              
203             can be written in perl as:
204              
205             dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;'));
206              
207             =back
208              
209             (The disadvantage is that you need to learn what functions Fsdb provides.)
210              
211             Fsdb is built on flat-ASCII databases. By storing data in simple text
212             files and processing it with pipelines it is easy to experiment (in
213             the shell) and look at the output.
214             To the best of my knowledge, the original implementation of
215             this idea was C, a commercial product described in the book
216             I
217             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and
218             also at the web page L). Fsdb is an incompatible
219             re-implementation of their idea without any accelerated indexing or
220             forms support. (But it's free, and probably has better statistics!).
221              
222             Fsdb-2.x will exploit multiple processors or cores,
223             and provides Perl-level support for input, output, and threaded-pipelines.
224             (As of Fsdb-2.44 it no longer uses Perl threading, just processes.)
225              
226             Installation instructions follow at the end of this document.
227             Fsdb-2.x requires Perl 5.8 to run.
228             All commands have manual pages and provide usage with the C<--help> option.
229             All commands are backed by an automated test suite.
230              
231             The most recent version of Fsdb is available on the web at
232             L.
233              
234              
235             =head1 WHAT'S NEW
236              
237             =head2 2.62, 2016-11-29
238             A new L and other minor improvements.
239              
240             =over 4
241              
242             =item ENHANCEMENT
243              
244             Documentation for L now includes sample output.
245              
246             =item NEW
247              
248             L converts a specific form of YAML to fsdb.
249              
250             =item BUG FIX
251              
252             The test suite now uses C rather than C
253             to make OpenBSD-5.9 happier, I hope.
254              
255             =item ENHANCEMENT
256              
257             Comments that log operations at the end of each file now do simple
258             quoting of spaces. (It is not guaranteed to be fully shell-compliant.)
259              
260             =item ENHANCEMENT
261              
262             There is a new standard option, C<--header>,
263             allowing one to specify an Fsdb header for inputs that lack it.
264             Currently it is supported by L,
265             L, L, L, L,
266             L.
267              
268             =back
269              
270              
271              
272             =head1 README CONTENTS
273              
274             =over 4
275              
276             =item executive summary
277              
278             =item what's new
279              
280             =item README CONTENTS
281              
282             =item installation
283              
284             =item basic data format
285              
286             =item basic data manipulation
287              
288             =item list of commands
289              
290             =item another example
291              
292             =item a gradebook example
293              
294             =item a password example
295              
296             =item history
297              
298             =item related work
299              
300             =item release notes
301              
302             =item copyright
303              
304             =item comments
305              
306             =back
307              
308              
309             =head1 INSTALLATION
310              
311             Fsdb now uses the standard Perl build and installation from
312             ExtUtil::MakeMaker(3), so the quick answer to installation is to type:
313            
314             perl Makefile.PL
315             make
316             make test
317             make install
318              
319             Or, if you want to install it somewhere else, change the first line to
320              
321             perl Makefile.PL PREFIX=$HOME
322              
323             and it will go in your home directory's F, etc.
324             (See L for more details.)
325              
326             Fsdb requires perl 5.8 or later.
327              
328             A test-suite is available, run it with
329              
330             make test
331              
332             A FreeBSD port to Fsdb is available, see
333             L.
334              
335             A Fink (MacOS X) port is available, see
336             L.
337             (Thanks to Lars Eggert for maintaining this port.)
338              
339              
340             =head1 BASIC DATA FORMAT
341              
342             These programs are based on the idea storing data in simple ASCII
343             files. A database is a file with one header line and then data or
344             comment lines. For example:
345              
346             #fsdb account passwd uid gid fullname homedir shell
347             johnh * 2274 134 John_Heidemann /home/johnh /bin/bash
348             greg * 2275 134 Greg_Johnson /home/greg /bin/bash
349             root * 0 0 Root /root /bin/bash
350             # this is a simple database
351              
352             The header line must be first and begins with C<#h>.
353             There are rows (records) and columns (fields),
354             just like in a normal database.
355             Comment lines begin with C<#>.
356             Column names are any string not containing spaces or single quote
357             (although it is prudent to keep them alphanumeric with underscore).
358              
359             By default, columns are delimited by whitespace.
360             With this default configuration, the contents of a field
361             cannot contain whitespace.
362             However, this limitation can be relaxed by changing the field separator
363             as described below.
364              
365             The big advantage of simple flat-text databases is that
366             it is usually easy to massage data into this format,
367             and it's reasonably easy to take data out of this
368             format into other (text-based) programs, like gnuplot, jgraph, and
369             LaTeX. Think Unix. Think pipes.
370             (Or even output to Excel and HTML if you prefer.)
371              
372             Since no-whitespace in columns was a problem for some applications,
373             there's an option which relaxes this rule. You can specify the field
374             separator in the table header with C<-F x> where C is
375             a code for the new field separator.
376             A full list of codes is at L,
377             but two common special values are C<-F t>
378             which is a separator of a single tab character,
379             and C<-F S>, a separator of two spaces.
380             Both allowing (single) spaces in fields. An example:
381              
382             #fsdb -F S account passwd uid gid fullname homedir shell
383             johnh * 2274 134 John Heidemann /home/johnh /bin/bash
384             greg * 2275 134 Greg Johnson /home/greg /bin/bash
385             root * 0 0 Root /root /bin/bash
386             # this is a simple database
387              
388             See L for more details. Regardless of what the column
389             separator is for the body of the data, it's always whitespace in the
390             header.
391              
392             There's also a third format: a "list". Because it's often hard to see
393             what's columns past the first two, in list format each "column" is on
394             a separate line. The programs dblistize and dbcolize convert to and
395             from this format, and all programs work with either formats.
396             The command
397              
398             dbfilealter -R C < DATA/passwd.fsdb
399              
400             outputs:
401              
402             #fsdb -R C account passwd uid gid fullname homedir shell
403             account: johnh
404             passwd: *
405             uid: 2274
406             gid: 134
407             fullname: John_Heidemann
408             homedir: /home/johnh
409             shell: /bin/bash
410            
411             account: greg
412             passwd: *
413             uid: 2275
414             gid: 134
415             fullname: Greg_Johnson
416             homedir: /home/greg
417             shell: /bin/bash
418            
419             account: root
420             passwd: *
421             uid: 0
422             gid: 0
423             fullname: Root
424             homedir: /root
425             shell: /bin/bash
426            
427             # this is a simple database
428             # | dblistize
429              
430             See L for more details.
431              
432              
433             =head1 BASIC DATA MANIPULATION
434              
435             A number of programs exist to manipulate databases.
436             Complex functions can be made by stringing together commands
437             with shell pipelines. For example, to print the home
438             directories of everyone with ``john'' in their names,
439             you would do:
440              
441             cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir
442              
443             The output might be:
444              
445             #fsdb homedir
446             /home/johnh
447             /home/greg
448             # this is a simple database
449             # | dbrow _fullname =~ /John/
450             # | dbcol homedir
451              
452             (Notice that comments are appended to the output listing each command,
453             providing an automatic audit log.)
454              
455             In addition to typical database functions (select, join, etc.) there
456             are also a number of statistical functions.
457              
458             The real power of Fsdb is that one can apply arbitrary code to rows
459             to do powerful things.
460              
461             cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/'
462              
463             converts "John_Heidemann" into "Heidemann,_John".
464             Not too much more work could split fullname into firstname and lastname
465             fields.
466              
467              
468             =head1 TALKING ABOUT COLUMNS
469              
470             An advantage of Fsdb is that you can talk about columns by name
471             (symbolically) rather than simply by their positions. So in the above
472             example, C pulled out the home directory column, and
473             C matched against column fullname.
474              
475             In general, you can use the name of the column listed on the C<#fsdb> line
476             to identify it in most programs, and _name to identify it in code.
477              
478             Some alternatives for flexibility:
479              
480             =over 4
481              
482             =item *
483              
484             Numeric values identify columns positionally, numbering from 0.
485             So 0 or _0 is the first column, 1 is the second, etc.
486              
487             =item *
488              
489             In code, _last_columnname gets the value from columname's previous row.
490              
491             =back
492              
493             See L for more details about writing code.
494              
495              
496              
497             =head1 LIST OF COMMANDS
498              
499             Enough said. I'll summarize the commands, and then you can
500             experiment. For a detailed description of each command, see a summary
501             by running it with the argument C<--help> (or C<-?> if you prefer.)
502             Full manual pages can be found by running the command
503             with the argument C<--man>, or running the Unix command C
504             or whatever program you want.
505              
506             =head2 TABLE CREATION
507              
508             =over 4
509              
510             =item dbcolcreate
511              
512             add columns to a database
513              
514             =item dbcoldefine
515              
516             set the column headings for a non-Fsdb file
517              
518             =back
519              
520             =head2 TABLE MANIPULATION
521              
522             =over 4
523              
524             =item dbcol
525              
526             select columns from a table
527              
528             =item dbrow
529              
530             select rows from a table
531              
532             =item dbsort
533              
534             sort rows based on a set of columns
535              
536             =item dbjoin
537              
538             compute the natural join of two tables
539              
540             =item dbcolrename
541              
542             rename a column
543              
544             =item dbcolmerge
545              
546             merge two columns into one
547              
548             =item dbcolsplittocols
549              
550             split one column into two or more columns
551              
552             =item dbcolsplittorows
553              
554             split one column into multiple rows
555              
556             =item dbfilepivot
557              
558             "pivots" a file, converting multiple rows
559             corresponding to the same entity into a single row with multiple columns.
560              
561             =item dbfilevalidate
562              
563             check that db file doesn't have some common errors
564              
565             =back
566              
567             =head2 COMPUTATION AND STATISTICS
568              
569             =over 4
570              
571             =item dbcolstats
572              
573             compute statistics over a column (mean,etc.,optionally median)
574              
575             =item dbmultistats
576              
577             group rows by some key value, then compute stats (mean, etc.) over each group
578             (equivalent to dbmapreduce with dbcolstats as the reducer)
579              
580             =item dbmapreduce
581              
582             group rows (map) and then apply an arbitrary function to each group (reduce)
583              
584             =item dbrvstatdiff
585              
586             compare two samples distributions (mean/conf interval/T-test)
587              
588             =item dbcolmovingstats
589              
590             computing moving statistics over a column of data
591              
592             =item dbcolstatscores
593              
594             compute Z-scores and T-scores over one column of data
595              
596             =item dbcolpercentile
597              
598             compute the rank or percentile of a column
599              
600             =item dbcolhisto
601              
602             compute histograms over a column of data
603              
604             =item dbcolscorrelate
605              
606             compute the coefficient of correlation over several columns
607              
608             =item dbcolsregression
609              
610             compute linear regression and correlation for two columns
611              
612             =item dbrowaccumulate
613              
614             compute a running sum over a column of data
615              
616             =item dbrowcount
617              
618             count the number of rows (a subset of dbstats)
619              
620             =item dbrowdiff
621              
622             compute differences between a columns in each row of a table
623              
624             =item dbrowenumerate
625              
626             number each row
627              
628             =item dbroweval
629              
630             run arbitrary Perl code on each row
631              
632             =item dbrowuniq
633              
634             count/eliminate identical rows (like Unix uniq(1))
635              
636             =item dbfilediff
637              
638             compare fields on rows of a file (something like Unix diff(1))
639              
640             =back
641              
642             =head2 OUTPUT CONTROL
643              
644             =over 4
645              
646             =item dbcolneaten
647              
648             pretty-print columns
649              
650             =item dbfilealter
651              
652             convert between column or list format, or change the column separator
653              
654             =item dbfilestripcomments
655              
656             remove comments from a table
657              
658             =item dbformmail
659              
660             generate a script that sends form mail based on each row
661              
662             =back
663              
664             =head2 CONVERSIONS
665              
666             (These programs convert data into fsdb. See their web pages for details.)
667              
668             =over 4
669              
670             =item cgi_to_db
671              
672             L
673              
674             =item combined_log_format_to_db
675              
676             L
677              
678             =item html_table_to_db
679              
680             HTML tables to fsdb (assuming they're reasonably formatted).
681              
682             =item kitrace_to_db
683              
684             L
685              
686             =item ns_to_db
687              
688             L
689              
690             =item sqlselect_to_db
691              
692             the output of SQL SELECT tables to db
693              
694             =item tabdelim_to_db
695              
696             spreadsheet tab-delimited files to db
697              
698             =item tcpdump_to_db
699              
700             (see man tcpdump(8) on any reasonable system)
701              
702             =item xml_to_db
703              
704             XML input to fsdb, assuming they're very regular
705              
706              
707             =back
708              
709             (And out of fsdb:)
710              
711             =over 4
712              
713             =item db_to_csv
714              
715             Comma-separated-value format from fsdb.
716              
717             =item db_to_html_table
718              
719             simple conversion of Fsdb to html tables
720              
721             =back
722              
723             =head2 STANDARD OPTIONS
724              
725             Many programs have common options:
726              
727             =over 4
728              
729             =item B<-?> or B<--help>
730              
731             Show basic usage.
732              
733             =item B<-N> on B<--new-name>
734              
735             When a command creates a new column like L's C,
736             this option lets one override the default name of that new column.
737              
738             =item B<-T TmpDir>
739              
740             where to put tmp files.
741             Also uses environment variable TMPDIR, if -T is
742             not specified.
743             Default is /tmp.
744              
745             Show basic usage.
746              
747             =item B<-c FRACTION> or B<--confidence FRACTION>
748              
749             Specify confidence interval FRACTION (L, L, etc.)
750              
751             =item B<-C S> or C<--element-separator S>
752              
753             Specify column separator S (L, L).
754              
755             =item B<-d> or B<--debug>
756              
757             Enable debugging (may be repeated for greater effect in some cases).
758              
759             =item B<-a> or B<--include-non-numeric>
760              
761             Compute stats over all data (treating non-numbers as zeros).
762             (By default, things that can't be treated as numbers
763             are ignored for stats purposes)
764              
765             =item B<-S> or B<--pre-sorted>
766              
767             Assume the data is pre-sorted.
768             May be repeated to disable verification (saving a small amount of work).
769              
770             =item B<-e E> or B<--empty E>
771              
772             give value E as the value for empty (null) records
773              
774             =item B<-i I> or B<--input I>
775              
776             Input data from file I.
777              
778             =item B<-o O> or B<--output O>
779              
780             Write data out to file O.
781              
782             =item B<--header> H
783              
784             Use H as the full Fsdb header, rather than reading a header from
785             then input. This option is particularly useful when using Fsdb
786             under Hadoop, where split files don't have heades.
787              
788             =item B<--nolog>.
789              
790             Skip logging the program in a trailing comment.
791              
792             =back
793              
794             When giving Perl code (in L and L)
795             column names can be embedded if preceded by underscores.
796             Look at L or L for examples.)
797              
798             Most programs run in constant memory and use temporary files if necessary.
799             Exceptions are L, L, L,
800             L, L.
801              
802              
803             =head1 ANOTHER EXAMPLE
804              
805             Take the raw data in C,
806             put a header on it (C),
807             took statistics of each category (C),
808             pick out the relevant fields (C), and you get:
809              
810             #fsdb size mean stddev pct_rsd
811             1024 1.4962e+06 2.8497e+05 19.047
812             10240 5.0286e+06 6.0103e+05 11.952
813             102400 4.9216e+06 3.0939e+05 6.2863
814             # | dbcoldefine size bw
815             # | /home/johnh/BIN/DB/dbmultistats -k size bw
816             # | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd
817              
818             (The whole command was:
819              
820             cat DATA/http_bandwidth |
821             dbcoldefine size |
822             dbmultistats -k size bw |
823             dbcol size mean stddev pct_rsd
824              
825             all on one line.)
826              
827             Then post-process them to get rid of the exponential notation
828             by adding this to the end of the pipeline:
829              
830             dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);'
831              
832             (Actually, this step is no longer required since L
833             now uses a different default format.)
834              
835             giving:
836              
837             #fsdb size mean stddev pct_rsd
838             1024 1496200 284970 19.047
839             10240 5028600 601030 11.952
840             102400 4921600 309390 6.2863
841             # | dbcoldefine size bw
842             # | dbmultistats -k size bw
843             # | dbcol size mean stddev pct_rsd
844             # | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); }
845              
846             In a few lines, raw data is transformed to processed output.
847              
848              
849             Suppose you expect there is an odd distribution of results of one
850             datapoint. Fsdb can easily produce a CDF (cumulative distribution
851             function) of the data, suitable for graphing:
852              
853             cat DB/DATA/http_bandwidth | \
854             dbcoldefine size bw | \
855             dbrow '_size == 102400' | \
856             dbcol bw | \
857             dbsort -n bw | \
858             dbrowenumerate | \
859             dbcolpercentile count | \
860             dbcol bw percentile | \
861             xgraph
862              
863             The steps, roughly:
864             1. get the raw input data and turn it into fsdb format,
865             2. pick out just the relevant column (for efficiency) and sort it,
866             3. for each data point, assign a CDF percentage to it,
867             4. pick out the two columns to graph and show them
868              
869              
870             =head1 A GRADEBOOK EXAMPLE
871              
872             The first commercial program I wrote was a gradebook,
873             so here's how to do it with Fsdb.
874              
875             Format your data like DATA/grades.
876              
877             #fsdb name email id test1
878             a a@ucla.example.edu 1 80
879             b b@usc.example.edu 2 70
880             c c@isi.example.edu 3 65
881             d d@lmu.example.edu 4 90
882             e e@caltech.example.edu 5 70
883             f f@oxy.example.edu 6 90
884              
885             Or if your students have spaces in their names, use C<-F S> and two spaces
886             to separate each column:
887              
888             #fsdb -F S name email id test1
889             alfred aho a@ucla.example.edu 1 80
890             butler lampson b@usc.example.edu 2 70
891             david clark c@isi.example.edu 3 65
892             constantine drovolis d@lmu.example.edu 4 90
893             debrorah estrin e@caltech.example.edu 5 70
894             sally floyd f@oxy.example.edu 6 90
895              
896             To compute statistics on an exam, do
897              
898             cat DATA/grades | dbstats test1 |dblistize
899              
900             giving
901              
902             #fsdb -R C ...
903             mean: 77.5
904             stddev: 10.84
905             pct_rsd: 13.987
906             conf_range: 11.377
907             conf_low: 66.123
908             conf_high: 88.877
909             conf_pct: 0.95
910             sum: 465
911             sum_squared: 36625
912             min: 65
913             max: 90
914             n: 6
915             ...
916              
917             To do a histogram:
918              
919             cat DATA/grades | dbcolhisto -n 5 -g test1
920              
921             giving
922              
923             #fsdb low histogram
924             65 *
925             70 **
926             75
927             80 *
928             85
929             90 **
930             # | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1
931              
932             Now you want to send out grades to the students by e-mail.
933             Create a form-letter (in the file F):
934              
935             To: _email (_name)
936             From: J. Random Professor
937             Subject: test1 scores
938              
939             _name, your score on test1 was _test1.
940             86+ A
941             75-85 B
942             70-74 C
943             0-69 F
944              
945             Generate the shell script that will send the mail out:
946              
947             cat DATA/grades | dbformmail test1.txt > test1.sh
948              
949             And run it:
950              
951             sh
952              
953             The last two steps can be combined:
954              
955             cat DATA/grades | dbformmail test1.txt | sh
956              
957             but I like to keep a copy of exactly what I send.
958              
959              
960             At the end of the semester you'll want to compute grade totals and
961             assign letter grades. Both fall out of dbroweval.
962             For example, to compute weighted total grades with a 40% midterm/60%
963             final where the midterm is 84 possible points and the final 100:
964              
965             dbcol -rv total |
966             dbcolcreate total - |
967             dbroweval '
968             _total = .40 * _midterm/84.0 + .60 * _final/100.0;
969             _total = sprintf("%4.2f", _total);
970             if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' |
971             dbcolneaten
972              
973              
974             If you got the data originally from a spreadsheet, save it in
975             "tab-delimited" format and convert it with tabdelim_to_db
976             (run tabdelim_to_db -? for examples).
977              
978              
979             =head1 A PASSWORD EXAMPLE
980              
981             To convert the Unix password file to db:
982              
983             cat /etc/passwd | sed 's/:/ /g'| \
984             dbcoldefine -F S login password uid gid gecos home shell \
985             >passwd.fsdb
986              
987             To convert the group file
988              
989             cat /etc/group | sed 's/:/ /g' | \
990             dbcoldefine -F S group password gid members \
991             >group.fsdb
992              
993             To show the names of the groups that div7-members are in
994             (assuming DIV7 is in the gecos field):
995              
996             cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \
997             dbjoin -i - -i group.fsdb gid | dbcol login group
998              
999              
1000             =head1 SHORT EXAMPLES
1001              
1002             Which Fsdb programs are the most complicated (based on number of test cases)?
1003              
1004             ls TEST/*.cmd | \
1005             dbcoldefine test | \
1006             dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \
1007             dbrowuniq -c | \
1008             dbsort -nr count | \
1009             dbcolneaten
1010              
1011             (Answer: L, then L, L and L.)
1012              
1013              
1014             Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)?
1015              
1016             cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments
1017              
1018             cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments
1019              
1020              
1021             Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming
1022             there's a common student id in column "id":
1023              
1024             dbcol id hw1 t.fsdb
1025              
1026             dbjoin -a -e - grades.fsdb t.fsdb id | \
1027             dbsort name | \
1028             dbcolneaten >new_grades.fsdb
1029              
1030              
1031             Merging two fsdb files with the same rows:
1032              
1033             cat file1.fsdb file2.fsdb >output.fsdb
1034              
1035             or if you want to clean things up a bit
1036              
1037             cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb
1038              
1039             or if you want to know where the data came from
1040              
1041             for i in 1 2
1042             do
1043             dbcolcreate source $i < file$i.fsdb
1044             done >output.fsdb
1045              
1046             (assumes you're using a Bourne-shell compatible shell, not csh).
1047            
1048              
1049             =head1 WARNINGS
1050              
1051             As with any tool, one should (which means I) understand
1052             the limits of the tool.
1053              
1054             All Fsdb tools should run in I.
1055             In some cases (such as F with quartiles, where the whole input
1056             must be re-read), programs will spool data to disk if necessary.
1057              
1058             Most tools buffer one or a few lines of data, so memory
1059             will scale with the size of each line.
1060             (So lines with many columns, or when columns have lots data,
1061             may cause large memory consumption.)
1062              
1063             All Fsdb tools should run in constant or at worst C time.
1064              
1065             All Fsdb tools use normal Perl math routines for computation.
1066             Although I make every attempt to choose numerically stable algorithms
1067             (although I also welcome feedback and suggestions for improvement),
1068             normal rounding due to computer floating point approximations
1069             can result in inaccuracies when data spans a large range of precision.
1070             (See for example the F test cases.)
1071              
1072             Any requirements and limitations of each Fsdb tool
1073             is documented on its manual page.
1074              
1075             If any Fsdb program violates these assumptions,
1076             that is a bug that should be documented
1077             on the tool's manual page or ideally fixed.
1078              
1079             Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have
1080             some bugs. Fsdb should work on perl from version 5.10 onward.
1081              
1082              
1083             =head1 HISTORY
1084              
1085             There have been three versions of Fsdb;
1086             fsdb 1.0 is a complete re-write of the pre-1995 versions,
1087             and was
1088             distributed from 1995 to 2007.
1089             Fsdb 2.0 is a significant re-write of the 1.x versions
1090             for reasons described below.
1091              
1092             Fsdb (in its various forms) has been used extensively by its author
1093             since 1991. Since 1995 it's been used by two other researchers at
1094             UCLA and several at ISI. In February 1998 it was announced to the
1095             Internet. Since then it has found a few users, some outside where I
1096             work.
1097              
1098             =head2 Fsdb 2.0 Rationale
1099              
1100             I've thought about fsdb-2.0 for many years, but it was started
1101             in earnest in 2007. Fsdb-2.0 has the following goals:
1102              
1103             =over 4
1104              
1105             =item in-one-process processing
1106              
1107             While fsdb is great on the Unix command line as a pipeline between
1108             programs, it should I be possible to set it up to run in a single
1109             process. And if it does so, it should be able to avoid serializing
1110             and deserializing (converting to and from text) data between each module.
1111             (Accomplished in fsdb-2.0: see L, although still needs tuning.)
1112              
1113             =item clean IO API
1114              
1115             Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is
1116             very, very crufty. More than just being ugly (but it was that too),
1117             this made things reading from one format file and writing to another
1118             the application's job, when it should be the library's.
1119             (Accomplished in fsdb-1.15 and improved in 2.0: see L.)
1120              
1121             =item normalized module APIs
1122              
1123             Because fsdb modules were added as needed over 10 years,
1124             sometimes the module APIs became inconsistent.
1125             (For example, the 1.x C required an empty
1126             value following the name of the new column,
1127             but other programs specify empty values with the C<-e> argument.)
1128             We should smooth over these inconsistencies.
1129             (Accomplished as each module was ported in 2.0 through 2.7.)
1130              
1131             =item everyone handles all input formats
1132              
1133             Given a clean IO API, the distinction between "colized"
1134             and "listized" fsdb files should go away. Any program
1135             should be able to read and write files in any format.
1136             (Accomplished in fsdb-2.1.)
1137              
1138             =back
1139              
1140             Fsdb-2.0 preserves backwards compatibility where possible,
1141             but breaks it where necessary to accomplish the above goals.
1142             In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions.
1143             Benchmarking in 2013 showed that threading performed much worse than
1144             just using pipes, so Fsdb-2.44 uses threading "style",
1145             but implemented with processes (via my "Freds" library).
1146              
1147             =head2 Contributors
1148              
1149             Fsdb includes code ported from Geoff Kuenning (C).
1150              
1151             Fsdb contributors:
1152             Ashvin Goel F,
1153             Geoff Kuenning F,
1154             Vikram Visweswariah F,
1155             Kannan Varadahan F,
1156             Lars Eggert F,
1157             Arkadi Gelfond F,
1158             David Graff F,
1159             Haobo Yu F,
1160             Pavlin Radoslavov F,
1161             Graham Phillips,
1162             Yuri Pradkin,
1163             Alefiya Hussain,
1164             Ya Xu,
1165             Michael Schwendt,
1166             Fabio Silva F,
1167             Jerry Zhao F,
1168             Ning Xu F,
1169             Martin Lukac F,
1170             Xue Cai,
1171             Michael McQuaid,
1172             Christopher Meng,
1173             Calvin Ardi,
1174             H. Merijn Brand,
1175             Lan Wei.
1176              
1177             Fsdb includes datasets contributed from NIST (F),
1178             from
1179             L,
1180             the NIST/SEMATECH e-Handbook of Statistical Methods, section
1181             1.4.2.8.1. Background and Data. The source is public domain, and
1182             reproduced with permission.
1183              
1184              
1185              
1186              
1187             =head1 RELATED WORK
1188              
1189             As stated in the introduction, Fsdb is an incompatible reimplementation
1190             of the ideas found in C. By storing data in simple text files and
1191             processing it with pipelines it is easy to experiment (in the shell)
1192             and look at the output. The original implementation of this idea was
1193             /rdb, a commercial product described in the book I
1194             database management: application development in the UNIX environment>
1195             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web
1196             page L).
1197              
1198             While Fsdb is inspired by Rdb, it includes no code from it,
1199             and Fsdb makes several different design choices.
1200             In particular: rdb attempts to be closer to a "real" database,
1201             with provision for locking, file indexing.
1202             Fsdb focuses on single user use and so eschews these choices.
1203             Rdb also has some support for interactive editing.
1204             Fsdb leaves editing to text editors like emacs or vi.
1205              
1206             In August, 2002 I found out Carlo Strozzi extended RDB with his
1207             package NoSQL L. According to
1208             Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of
1209             RDB. Although I haven't found Perl startup overhead to be a big
1210             problem on my platforms (from old Sparcstation IPCs to 2GHz
1211             Pentium-4s), you may want to evaluate his system.
1212             The Linux Journal has a description of NoSQL
1213             at L.
1214             It seems quite similar to Fsdb.
1215             Like /rdb, NoSQL supports indexing (not present in Fsdb).
1216             Fsdb appears to have richer support for statistics,
1217             and, as of Fsdb-2.x, its support for Perl threading may support
1218             faster performance (one-process, less serialization and deserialization).
1219              
1220              
1221             =head1 RELEASE NOTES
1222              
1223             Versions prior to 1.0 were released informally on my web page
1224             but were not announced.
1225              
1226             =head2 0.0 1991
1227              
1228             started for my own research use
1229              
1230             =head2 0.1 26-May-94
1231              
1232             first check-in to RCS
1233              
1234             =head2 0.2 15-Mar-95
1235              
1236             parts now require perl5
1237              
1238             =head2 1.0, 22-Jul-97
1239              
1240             adds autoconf support and a test script.
1241              
1242             =head2 1.1, 20-Jan-98
1243              
1244             support for double space field separators, better tests
1245              
1246             =head2 1.2, 11-Feb-98
1247              
1248             minor changes and release on comp.lang.perl.announce
1249              
1250             =head2 1.3, 17-Mar-98
1251              
1252             =over 4
1253              
1254             =item *
1255             adds median and quartile options to dbstats
1256              
1257              
1258             =item *
1259              
1260             adds dmalloc_to_db converter
1261              
1262              
1263             =item *
1264              
1265             fixes some warnings
1266              
1267              
1268             =item *
1269              
1270             dbjoin now can run on unsorted input
1271              
1272              
1273             =item *
1274              
1275             fixes a dbjoin bug
1276              
1277              
1278             =item *
1279              
1280             some more tests in the test suite
1281              
1282             =back
1283              
1284             =head2 1.4, 27-Mar-98
1285              
1286             =over 4
1287              
1288             =item *
1289              
1290             improves error messages (all should now report the program that makes the error)
1291              
1292             =item *
1293              
1294             fixed a bug in dbstats output when the mean is zero
1295              
1296             =back
1297              
1298             =head2 1.5, 25-Jun-98
1299              
1300             =over 4
1301              
1302             =item BUG FIX
1303             dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats
1304              
1305             =item NEW
1306             dbcolstats computes zscores and tscores over a column
1307              
1308             =item NEW
1309             dbcolscorrelate computes correlation coefficients between two columns
1310              
1311             =item INTERNAL
1312             ficus_getopt.pl has been replaced by DbGetopt.pm
1313              
1314             =item BUG FIX
1315             all tests are now ``portable'' (previously some tests ran only on my system)
1316              
1317             =item BUG FIX
1318             you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond)
1319              
1320             =item BUG FIX
1321             installation no longer uses cp -f (to work on SunOS 4)
1322              
1323             =back
1324              
1325             =head2 1.6, 24-May-99
1326              
1327             =over 4
1328              
1329             =item NEW
1330             dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary)
1331              
1332             =item NEW
1333             dbcolmovingstats does moving means over a series of data
1334              
1335             =item NEW
1336             dbcol has a -v option to get all columns except those listed
1337              
1338             =item NEW
1339             dbmultistats does quartiles and medians
1340              
1341             =item NEW
1342             dbstripextraheaders now also cleans up bogus comments before the fist header
1343              
1344             =item BUG FIX
1345             dbcolneaten works better with double-space-separated data
1346              
1347             =back
1348              
1349             =head2 1.7, 5-Jan-00
1350              
1351             =over 4
1352              
1353             =item NEW
1354             dbcolize now detects and rejects lines that contain embedded copies of the field separator
1355              
1356             =item NEW
1357             configure tries harder to prevent people from improperly configuring/installing fsdb
1358              
1359             =item NEW
1360             tcpdump_to_db converter (incomplete)
1361              
1362             =item NEW
1363             tabdelim_to_db converter: from spreadsheet tab-delimited files to db
1364              
1365             =item NEW
1366             mailing lists for fsdb are C and C
1367              
1368             To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message.
1369              
1370             =item BUG FIX
1371             dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case.
1372              
1373             =item BUG FIX
1374             the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency.
1375              
1376             =item BUG FIX (DOCUMENTATION)
1377             dbcolmovingstats had incorrect sample output.
1378              
1379             =back
1380              
1381             =head2 1.8, 28-Jun-00
1382              
1383             =over 4
1384              
1385             =item BUG FIX
1386             header options are now preserved when writing with dblistize
1387              
1388             =item NEW
1389             dbrowuniq now optionally checks for uniqueness only on certain fields
1390              
1391             =item NEW
1392             dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields
1393              
1394             =item NEW
1395             converter for "crl" format network traces
1396              
1397             =item NEW
1398             anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo.
1399              
1400             =item OPTIMIZATION
1401             comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.)
1402              
1403             =item BUG FIX
1404             dbstats with median/quartiles now correctly handles singleton data points.
1405              
1406             =back
1407              
1408             =head2 1.9, 6-Nov-00
1409              
1410             =over 4
1411              
1412             =item NEW
1413             dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov).
1414              
1415             =item BUG FIX
1416             dbsort now works with perl-5.6
1417              
1418             =back
1419              
1420             =head2 1.10, 10-Apr-01
1421              
1422             =over 4
1423              
1424             =item BUG FIX
1425             dbstats now handles the case where there are more n-tiles than data
1426              
1427             =item NEW
1428             dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu)
1429              
1430             =item BUG FIX
1431             dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu)
1432              
1433             =item BUG FIX
1434             cleanup of temporary files is slightly better
1435              
1436             =back
1437              
1438             =head2 1.11, 2-Nov-01
1439              
1440             =over 4
1441              
1442             =item BUG FIX
1443             dbcolneaten now runs in constant memory
1444              
1445             =item NEW
1446             dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be
1447              
1448             =item OPTIMIZATION
1449             dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001)
1450              
1451             =item INTERNAL
1452             t_distr now ported to perl5 module DbTDistr
1453              
1454             =back
1455              
1456             =head2 1.12, 30-Oct-02
1457              
1458             =over 4
1459              
1460             =item BUG FIX
1461             dbmultistats documentation typo fixed
1462              
1463             =item NEW
1464             dbcolmultiscale
1465              
1466             =item NEW
1467             dbcol has -r option for "relaxed error checking"
1468              
1469             =item NEW
1470             dbcolneaten has new -e option to strip end-of-line spaces
1471              
1472             =item NEW
1473             dbrow finally has a -v option to negate the test
1474              
1475             =item BUG FIX
1476             math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases)
1477              
1478             =item BUG FIX
1479             some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.)
1480              
1481             =item BUG FIX
1482             fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem)
1483              
1484             =item NEW
1485             db_to_html_table: simple conversion to html tables (NO fancy stuff)
1486              
1487             =item NEW
1488             dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML
1489              
1490             =back
1491              
1492              
1493             =head2 1.13, 4-Feb-04
1494              
1495              
1496             =over 4
1497              
1498             =item NEW
1499             fsdb added to the freebsd ports tree L. Maintainer: C
1500              
1501             =item BUG FIX
1502             properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C.
1503              
1504             =item NEW
1505             dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README.
1506              
1507             =item NEW
1508             cgi_to_db converts CGI.pm-format storage to fsdb list format
1509              
1510             =item BUG FIX
1511             handle numeric synonyms for column names in dbcol properly
1512              
1513             =item ENHANCEMENT
1514             "talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert.
1515              
1516             =item CHANGE
1517             dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root)
1518              
1519             =item NEW
1520             on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode
1521              
1522             =item NEW
1523             dbfilevalidate: check a db file for some common errors
1524              
1525             =back
1526              
1527              
1528             =head2 1.14, 24-Aug-06
1529              
1530             =over 4
1531              
1532              
1533             =item ENHANCEMENT
1534             README cleanup
1535              
1536             =item INCOMPATIBLE CHANGE
1537             dbcolsplit renamed dbcolsplittocols
1538              
1539             =item NEW
1540             dbcolsplittorows split one column into multiple rows
1541              
1542             =item NEW
1543             dbcolsregression compute linear regression and correlation for two columns
1544              
1545             =item ENHANCEMENT
1546             cvs_to_db: better error handling, normalize field names, skip blank lines
1547              
1548             =item ENHANCEMENT
1549             dbjoin now detects (and fails) if non-joined files have duplicate names
1550              
1551             =item BUG FIX
1552             minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors)
1553              
1554             =back
1555              
1556             =head2 1.15, 12-Nov-07
1557              
1558             =over 4
1559              
1560             =item NEW
1561             fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.)
1562              
1563             =item NEW
1564             Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4.
1565              
1566             =item NEW
1567             dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats.
1568              
1569             =item ENHANCEMENT
1570             fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace.
1571              
1572             =item ENHANCEMENT
1573             dblib.pl is now moved to Fsdb::Old.pm.
1574              
1575             =item BUG FIX
1576             dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!).
1577              
1578             =item ENHANCEMENT
1579             dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.)
1580              
1581             =item ENHANCEMENT
1582             adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way.
1583              
1584             =item BUG FIX
1585             dbmultistats memory usage corrected (O(# tags), not O(1))
1586              
1587             =item ENHANCEMENT
1588             dbmultistats can now optionally run with pre-grouped input in O(1) memory
1589              
1590             =item ENHANCEMENT
1591             dbroweval -N was finally implemented (eat comments)
1592              
1593             =back
1594              
1595             =head2 2.0, 25-Jan-08
1596              
1597             2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete)
1598              
1599             =over 4
1600              
1601             =item ENHANCEMENT:
1602             shifting old programs to Perl modules, with
1603             the front-end program as just a wrapper.
1604             In the short-term, this change just means programs have real man pages.
1605             In the long-run, it will mean that one can run a pipeline in a single
1606             Perl program.
1607             So far:
1608             L,
1609             L,
1610             the new L.
1611             L
1612             the new L,
1613             the old C (renamed L),
1614             L,
1615             L,
1616              
1617             =item NEW:
1618             L is an internal-only module that lets one
1619             use fsdb commands from within perl (via threads).
1620              
1621             It also provides perl function aliases for the internal modules,
1622             so a string of fsdb commands in perl are nearly as terse as in the
1623             shell:
1624              
1625             use Fsdb::Filter::dbpipeline qw(:all);
1626             dbpipeline(
1627             dbrow(qw(name test1)),
1628             dbroweval('_test1 += 5;')
1629             );
1630              
1631             =item INCOMPATIBLE CHANGE:
1632             The old L has been renamed L.
1633             The new L does the same thing as the old L.
1634             This incompatibility is unfortunate but normalizes program names.
1635              
1636             =item CHANGE:
1637             The new L program
1638             always outputs C<-> (the default empty value) for
1639             statistics it cannot compute (for example, standard deviation
1640             if there is only one row),
1641             instead of the old mix of C<-> and "na".
1642              
1643             =item INCOMPATIBLE CHANGE:
1644             The old L program, now called L,
1645             also has different arguments. The C<-t mean,stddev> option is now
1646             C<--tmean mean --tstddev stddev>. See L for details.
1647              
1648             =item INCOMPATIBLE CHANGE:
1649             L now assumes all new columns get the default
1650             value rather than requiring each column to have an initial constant value.
1651             To change the initial value, sue the new C<-e> option.
1652              
1653             =item NEW:
1654             L counts rows, an almost-subset of L's C output
1655             (except without differentiating numeric/non-numeric input),
1656             or the equivalent of C.
1657              
1658             =item NEW:
1659             L merges two sorted files.
1660             This functionality was previously embedded in L.
1661              
1662             =item INCOMPATIBLE CHANGE:
1663             L's C<-i> option to include non-matches
1664             is now renamed C<-a>, so as to not conflict with the new
1665             standard option C<-i> for input file.
1666              
1667             =back
1668              
1669             =head2 2.1, 6-Apr-08
1670              
1671             2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format
1672              
1673             =over 4
1674              
1675             =item ENHANCEMENT:
1676             shifting more old programs to Perl modules.
1677             New in 2.1:
1678             L,
1679             L,
1680             L,
1681             L,
1682             L,
1683             L
1684              
1685             =item ENHANCEMENT
1686             L now handles an arbitrary number of input files,
1687             not just exactly two.
1688              
1689             =item NEW
1690             L is an internal routine that handles merging exactly two files.
1691              
1692             =item INCOMPATIBLE CHANGE
1693             L now specifies inputs like L,
1694             rather than assuming the first two arguments were tables (as in fsdb-1).
1695              
1696             The old L argument C<-i> is now C<-a> or <--type=outer>.
1697              
1698             A minor change: comments in the source files for
1699             L are now intermixed with output
1700             rather than being delayed until the end.
1701              
1702             =item ENHANCEMENT
1703             L now no longer produces warnings when null values are
1704             passed to numeric comparisons.
1705              
1706             =item BUG FIX
1707             L now once again works with code that lacks a trailing semicolon.
1708             (This bug fixes a regression from 1.15.)
1709              
1710             =item INCOMPATIBLE CHANGE
1711             L's old C<-e> option (to avoid end-of-line spaces) is now C<-E>
1712             to avoid conflicts with the standard empty field argument.
1713              
1714             =item INCOMPATIBLE CHANGE
1715             L's old C<-e> option is now C<-E> to avoid conflicts.
1716             And its C<-n>, C<-s>, and C<-w> are now
1717             C<-N>, C<-S>, and C<-W> to correspond.
1718              
1719             =item NEW
1720             L replaces L, L, and L,
1721             but with different options.
1722              
1723             =item ENHANCEMENT
1724             The library routines C now understand both list-format
1725             and column-format data, so all converted programs can now
1726             I read either format. This capability was one
1727             of the milestone goals for 2.0, so yea!
1728              
1729             =back
1730              
1731             =head2 2.2, 23-May-08
1732              
1733             Release 2.2 is another 2.x alpha release. Now I of the
1734             commands are ported, but a few remain, and I plan one last
1735             incompatible change (to the file header) before 2.x final.
1736              
1737             =over 4
1738              
1739             =item ENHANCEMENT
1740              
1741             shifting more old programs to Perl modules.
1742             New in 2.2:
1743             L,
1744             L.
1745             L.
1746             L.
1747             L.
1748             L.
1749             L.
1750             L.
1751             L.
1752             L.
1753             L.
1754             Also
1755             L
1756             exists only as a front-end (command-line) program.
1757              
1758             =item INCOMPATIBLE CHANGE
1759              
1760             The following programs have been dropped from fsdb-2.x:
1761             L,
1762             L,
1763             L,
1764             L.
1765              
1766             =item NEW
1767              
1768             L to convert Apache logfiles
1769              
1770             =item INCOMPATIBLE CHANGE
1771              
1772             Options to L are now B<-B> and B<-I>,
1773             not B<-a> and B<-i>.
1774              
1775             =item INCOMPATIBLE CHANGE
1776              
1777             L is now L.
1778              
1779             =item BUG FIXES
1780              
1781             L better handles empty columns;
1782             L warning suppressed (actually a bug in high-bucket handling).
1783              
1784             =item INCOMPATIBLE CHANGE
1785              
1786             L now requires a C<-k> option in front of the
1787             key (tag) field, or if none is given, it will group by the first field
1788             (both like L).
1789              
1790             =item KNOWN BUG
1791              
1792             L with quantile option doesn't work currently.
1793              
1794             =item INCOMPATIBLE CHANGE
1795              
1796             L is renamed L.
1797              
1798             =item BUG FIXES
1799              
1800             L was leaving its log message as a command, not a comment.
1801             Oops. No longer.
1802              
1803             =back
1804              
1805             =head2 2.3, 27-May-08 (alpha)
1806              
1807             Another alpha release, this one just to fix the critical dbjoin bug
1808             listed below (that happens to have blocked my MP3 jukebox :-).
1809              
1810             =over 4
1811              
1812             =item BUG FIX
1813              
1814             Dbsort no longer hangs if given an input file with no rows.
1815              
1816             =item BUG FIX
1817              
1818             Dbjoin now works with unsorted input coming from a pipeline (like stdin).
1819             Perl-5.8.8 has a bug (?) that was making this case fail---opening
1820             stdin in one thread, reading some, then reading more in a different
1821             thread caused an lseek which works on files, but fails on pipes like stdin.
1822             Go figure.
1823              
1824             =item BUG FIX / KNOWN BUG
1825              
1826             The dbjoin fix also fixed dbmultistats -q
1827             (it now gives the right answer).
1828             Although a new bug appeared, messages like:
1829             Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction.
1830             So the dbmultistats_quartile test is still disabled.
1831              
1832             =back
1833              
1834             =head2 2.4, 18-Jun-08
1835              
1836             Another alpha release, mostly to fix minor usability
1837             problems in dbmapreduce and client functions.
1838              
1839             =over 4
1840              
1841             =item ENHANCEMENT
1842              
1843             L now defaults to running user supplied code without warnings
1844             (as with fsdb-1.x).
1845             Use C<--warnings> or C<-w> to turn them back on.
1846              
1847             =item ENHANCEMENT
1848              
1849             L can now write different format output
1850             than the input, using the C<-m> option.
1851              
1852             =item KNOWN BUG
1853              
1854             L emits warnings on perl 5.10.0
1855             about "Unbalanced string table refcount" and "Scalars leaked"
1856             when run with an external program as a reducer.
1857              
1858             L emits the warning "Attempt to free unreferenced scalar"
1859             when run with quartiles.
1860              
1861             In each case the output is correct.
1862             I believe these can be ignored.
1863              
1864             =item CHANGE
1865              
1866             L no longer logs a line for each reducer that is invoked.
1867              
1868             =back
1869              
1870              
1871             =head2 2.5, 24-Jun-08
1872              
1873             Another alpha release, fixing more minor bugs in
1874             C and lossage in C.
1875              
1876             =over 4
1877              
1878             =item ENHANCEMENT
1879              
1880             L can now tolerate non-map-aware reducers
1881             that pass back the key column in put.
1882             It also passes the current key as the last argument to
1883             external reducers.
1884              
1885             =item BUG FIX
1886              
1887             L, correctly handle C<-header> option again.
1888             (Broken since fsdb-2.3.)
1889              
1890             =back
1891              
1892              
1893             =head2 2.6, 11-Jul-08
1894              
1895             Another alpha release, needed to fix DaGronk.
1896             One new port, small bug fixes, and important fix to L.
1897              
1898             =over 4
1899              
1900             =item ENHANCEMENT
1901              
1902             shifting more old programs to Perl modules.
1903             New in 2.2:
1904             L.
1905              
1906             =item INCOMPATIBLE CHANGE and ENHANCEMENTS
1907             L arguments changed,
1908             use C<--rank> to require ranking instead of C<-r>.
1909             Also, C<--ascending> and C<--descending> can now be specified separately,
1910             both for C<--percentile> and C<--rank>.
1911              
1912             =item BUG FIX
1913              
1914             Sigh, the sense of the --warnings option in L was inverted. No longer.
1915              
1916             =item BUG FIX
1917              
1918             I found and fixed the string leaks (errors like "Unbalanced string
1919             table refcount" and "Scalars leaked") in L and L.
1920             (All Cs in threads must be manually destroyed.)
1921              
1922             =item BUG FIX
1923              
1924             The C<-C> option to specify the column separator in L
1925             now works again (broken since it was ported).
1926              
1927             =back
1928              
1929             2.7, 30-Jul-08 beta
1930              
1931             The beta release of fsdb-2.x. Finally, all programs are ported.
1932             As statistics, the number of lines of non-library code doubled from
1933             7.5k to 15.5k. The libraries are much more complete,
1934             going from 866 to 5164 lines.
1935             The overall number of programs is about the same,
1936             although 19 were dropped and 11 were added.
1937             The number of test cases has grown from 116 to 175.
1938             All programs are now in perl-5, no more shell scripts or perl-4.
1939             All programs now have manual pages.
1940              
1941             Although this is a major step forward, I still expect
1942             to rename "fsdb" to "fsdb".
1943              
1944             =over 4
1945              
1946             =item ENHANCEMENT
1947              
1948             shifting more old programs to Perl modules.
1949             New in 2.7:
1950             L.
1951             L.
1952             L.
1953             L.
1954             L.
1955             L,
1956             L,
1957             L,
1958             L,
1959             L,
1960             L.
1961              
1962             =item INCOMPATIBLE CHANGE
1963              
1964             The following programs have been dropped from fsdb-2.x:
1965             L,
1966             L,
1967             L.
1968             L.
1969             They may come back, but seemed overly specialized.
1970             The following program
1971             L
1972             was dropped because it is superseded by L.
1973             L
1974             was dropped pending a test cases and examples.
1975              
1976             =item ENHANCEMENT
1977              
1978             L now has a C<-c> option to correct errors.
1979              
1980             =item NEW
1981              
1982             L provides the inverse of
1983             L.
1984              
1985             =back
1986              
1987              
1988             =head2 2.8, 5-Aug-08
1989              
1990             Change header format, preserving forwards compatibility.
1991              
1992             =over 4
1993              
1994             =item BUG FIX
1995              
1996             Complete editing pass over the manual, making sure it aligns
1997             with fsdb-2.x.
1998              
1999             =item SEMI-COMPATIBLE CHANGE
2000              
2001             The header of fsdb files has changed, it is now #fsdb, not #h (or #L)
2002             and parsing of -F and -R are also different.
2003             See L for the new specification.
2004             The v1 file format will be read, compatibly, but
2005             not written.
2006              
2007             =item BUG FIX
2008              
2009             L now tolerates comments that precede the first key,
2010             instead of failing with an error message.
2011              
2012             =back
2013              
2014              
2015             =head2 2.9, 6-Aug-08
2016              
2017             Still in beta; just a quick bug-fix for L.
2018              
2019             =over 4
2020              
2021             =item ENHANCEMENT
2022              
2023             L now generates plausible output when given no rows
2024             of input.
2025              
2026             =back
2027              
2028             =head2 2.10, 23-Sep-08
2029              
2030             Still in beta, but picking up some bug fixes.
2031              
2032             =over 4
2033              
2034             =item ENHANCEMENT
2035              
2036             L now generates plausible output when given no rows
2037             of input.
2038              
2039             =item ENHANCEMENT
2040              
2041             L the warnings option was backwards;
2042             now corrected. As a result, warnings in user code now default off
2043             (like in fsdb-1.x).
2044              
2045             =item BUG FIX
2046              
2047             L now defaults to assuming the target column is numeric.
2048             The new option C<-N> allows selection of a non-numeric target.
2049              
2050             =item BUG FIX
2051              
2052             L now includes C<--sample> and C<--nosample> options
2053             to compute the sample or full population correlation coefficients.
2054             Thanks to Xue Cai for finding this bug.
2055              
2056             =back
2057              
2058              
2059             =head2 2.11, 14-Oct-08
2060              
2061             Still in beta, but picking up some bug fixes.
2062              
2063             =over 4
2064              
2065             =item ENHANCEMENT
2066              
2067             L is now more aggressive about filling in empty cells
2068             with the official empty value, rather than leaving them blank or as whitespace.
2069              
2070             =item ENHANCEMENT
2071              
2072             L now catches failures during pipeline element setup
2073             and exits reasonably gracefully.
2074              
2075             =item BUG FIX
2076              
2077             L now reaps child processes, thus avoiding
2078             running out of processes when used a lot.
2079              
2080             =back
2081              
2082             =head2 2.12, 16-Oct-08
2083              
2084             Finally, a full (non-beta) 2.x release!
2085              
2086             =over 4
2087              
2088             =item INCOMPATIBLE CHANGE
2089              
2090             Jdb has been renamed Fsdb, the flatfile-streaming database.
2091             This change affects all internal Perl APIs,
2092             but no shell command-level APIs.
2093             While Jdb served well for more than ten years,
2094             it is easily confused with the Java debugger (even though Jdb was there first!).
2095             It also is too generic to work well in web search engines.
2096             Finally, Jdb stands for ``John's database'', and we're a bit beyond that.
2097             (However, some call me the ``file-system guy'', so
2098             one could argue it retains that meeting.)
2099              
2100             If you just used the shell commands, this change should not affect you.
2101             If you used the Perl-level libraries directly in your code,
2102             you should be able to rename "Jdb" to "Fsdb" to move to 2.12.
2103              
2104             The jdb-announce list not yet been renamed, but it will be shortly.
2105              
2106             With this release I've accomplished everything I wanted to
2107             in fsdb-2.x. I therefore expect to return to boring, bugfix releases.
2108              
2109             =back
2110              
2111             =head2 2.13, 30-Oct-08
2112              
2113             =over 4
2114              
2115             =item BUG FIX
2116              
2117             L now treats non-numeric data as zero by default.
2118              
2119             =item BUG FIX
2120              
2121             Fixed a perl-5.10ism in L that
2122             breaks that program under 5.8.
2123             Thanks to Martin Lukac for reporting the bug.
2124              
2125             =back
2126              
2127             =head2 2.14, 26-Nov-08
2128              
2129             =over 4
2130              
2131             =item BUG FIX
2132              
2133             Improved documentation for L's C<-f> option.
2134              
2135             =item ENHANCEMENT
2136              
2137             L how computes a moving standard deviation in addition
2138             to a moving mean.
2139              
2140             =back
2141              
2142              
2143             =head2 2.15, 13-Apr-09
2144              
2145             =over 4
2146              
2147             =item BUG FIX
2148              
2149             Fix a F bug reported by Shalindra Fernando.
2150              
2151             =back
2152              
2153              
2154             =head2 2.16, 14-Apr-09
2155              
2156             =over 4
2157              
2158             =item BUG FIX
2159              
2160             Another minor release bug: on some systems F looses
2161             executable permissions. Again reported by Shalindra Fernando.
2162              
2163             =back
2164              
2165             =head2 2.17, 25-Jun-09
2166              
2167             =over 4
2168              
2169             =item TYPO FIXES
2170              
2171             Typo in the F manual fixed.
2172              
2173             =item IMPROVEMENT
2174              
2175             There is no longer a comment line to label columns
2176             in F, instead the header line is tweaked to
2177             line up. This change restores the Jdb-1.x behavior, and
2178             means that repeated runs of dbcolneaten no longer add comment lines
2179             each time.
2180              
2181             =item BUG FIX
2182              
2183             It turns out F was not correctly handling trailing spaces
2184             when given the C<-E> option to suppress them. This regression is now
2185             fixed.
2186              
2187             =item EXTENSION
2188              
2189             L can now handle direct references to the last row
2190             via F<$lfref>, a dubious but now documented feature.
2191              
2192             =item BUG FIXES
2193              
2194             Separators set with C<-C> in F and F
2195             were not properly
2196             setting the heading, and null fields were not recognized.
2197             The first bug was reported by Martin Lukac.
2198              
2199             =back
2200              
2201             =head2 2.18, 1-Jul-09 A minor release
2202              
2203             =over 4
2204              
2205             =item IMPROVEMENT
2206              
2207             Documentation for F has been improved.
2208              
2209             =item IMPROVEMENT
2210              
2211             The package should now be PGP-signed.
2212              
2213             =back
2214              
2215              
2216             =head2 2.19, 10-Jul-09
2217              
2218             =over 4
2219              
2220             =item BUG FIX
2221              
2222             Internal improvements to debugging output and robustness of
2223             F and F.
2224             F re-enabled.
2225              
2226             =back
2227              
2228              
2229             =head2 2.20, 30-Nov-09
2230             (A collection of minor bugfixes, plus a build against Fedora 12.)
2231              
2232             =over 4
2233              
2234             =item BUG FIX
2235              
2236             Loging for
2237             F
2238             with code refs is now stable
2239             (it no longer includes a hex pointer to the code reference).
2240              
2241             =item BUG FIX
2242              
2243             Better handling of mixed blank lines in F
2244             (see test case F).
2245              
2246             =item BUG FIX
2247              
2248             F now handles multi-line input better,
2249             and handles tables with COLSPAN.
2250              
2251             =item BUG FIX
2252              
2253             F now cleans up threads in an C
2254             to prevent "cannot detach a joined thread" errors that popped
2255             up in perl-5.10. Hopefully this prevents a race condition
2256             that causes the test suites to hang about 20% of the time
2257             (in F).
2258              
2259             =item IMPROVEMENT
2260              
2261             F now detects and correctly fails
2262             when the input and reducer have incompatible
2263             field separators.
2264              
2265             =item IMPROVEMENT
2266              
2267             F, F, F, F,
2268             and F
2269             now all take an C<-F> option to let one specify the output field separator
2270             (so they work better with F).
2271              
2272             =item BUG FIX
2273              
2274             An omitted C<-k> from the manual page of F
2275             is now there. Bug reported by Unkyu Park.
2276              
2277             =back
2278              
2279              
2280             =head2 2.21, 17-Apr-10
2281             bug fix release
2282              
2283             =over 4
2284              
2285             =item BUG FIX
2286              
2287             F now no longer fails with -outputheader => never
2288             (an obscure bug).
2289              
2290             =item IMPROVEMENT
2291              
2292             F (in the warnings section)
2293             and F now more carefully document how they
2294             handle (and do not handle) numerical precision problems,
2295             and other general limits. Thanks to Yuri Pradkin for prompting
2296             this documentation.
2297              
2298             =item IMPROVEMENT
2299              
2300             C
2301             is now restored from C.
2302              
2303             =item IMPROVEMENT
2304              
2305             Documention for multiple styles of input approaches
2306             (including performance description) added to L.
2307              
2308             =back
2309              
2310             =head2 2.22, 2010-10-31
2311             One new tool F and several bug fixes for Perl 5.10.
2312              
2313             =over 4
2314              
2315             =item BUG FIX
2316              
2317             F now correctly handles n-way merges.
2318             Bug reported by Yuri Pradkin.
2319              
2320             =item INCOMPARABLE CHANGE
2321              
2322             F now defaults to I padding the last column.
2323              
2324             =item ADDITION
2325              
2326             F now takes B<-N NewColumn> to give the new
2327             column a name other than "count". Feature requested by Mike Rouch
2328             in January 2005.
2329              
2330             =item ADDITION
2331              
2332             New program F copies the last value of a column
2333             into a new column copylast_column of the next row.
2334             New program requested by Fabio Silva;
2335             useful for converting dbmultistats output into dbrvstatdiff input.
2336              
2337             =item BUG FIX
2338              
2339             Several tools (particularly F and F) would
2340             report errors like "Unbalanced string table refcount: (1) for "STDOUT"
2341             during global destruction" on exit, at least on certain versions
2342             of Perl (for me on 5.10.1), but similar errors have been off-and-on
2343             for several Perl releases. Although I think my code looked
2344             OK, I worked around this problem with a different way of handling
2345             standard IO redirection.
2346              
2347             =back
2348              
2349              
2350             =head2 2.23, 2011-03-10
2351             Several small portability bugfixes; improved F for large datasets
2352              
2353             =over 4
2354              
2355             =item IMPROVEMENT
2356              
2357             Documentation to F was changed to use "sd" to refer to
2358             standard deviation, not "ss" (which might be confused with sum-of-squares).
2359              
2360             =item BUG FIX
2361              
2362             This documentation about F was missing the F<-k> option
2363             in some cases.
2364              
2365             =item BUG FIX
2366              
2367             F was failing on MacOS-10.6.3 for some tests with
2368             the error
2369              
2370             dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl)
2371              
2372             The problem seemed to be only in the error, not in operation.
2373             On MacOS, the error is now suppressed.
2374             Thanks to Alefiya Hussain for providing access to a Mac system
2375             that allowed debugging of this problem.
2376              
2377             =item IMPROVEMENT
2378              
2379             The F command requires an external
2380             Perl library (F). On computers that
2381             lack this optional library, previously Fsdb would configure
2382             with a warning and then test cases would fail.
2383             Now those test cases are skipped with an additional warning.
2384              
2385             =item BUG FIX
2386              
2387             The test suite now supports alternative valid output, as a hack
2388             to account for last-digit floating point differences.
2389             (Not very satisfying :-(
2390              
2391             =item BUG FIX
2392              
2393             F output for confidence intervals on very large
2394             datasets has changed. Previously it failed for more than 2^31-1
2395             records, and handling of T-Distributions with thousands of rows
2396             was a bit dubious. Now datasets with more than 10000 are considered
2397             infinitely large and hopefully correctly handled.
2398              
2399             =back
2400              
2401             =head2 2.24, 2011-04-15
2402             Improvements to fix an old bug in dbmapreduce with different field separators
2403              
2404             =over 4
2405              
2406             =item IMPROVEMENT
2407              
2408             The F command had a C<--correct> option to
2409             work-around from incompatible field-separators,
2410             but it did nothing. Now it does the correct but sad, data-loosing
2411             thing.
2412              
2413             =item IMPROVEMENT
2414              
2415             The F command
2416             previously failed with an error message when invoked
2417             on input with a non-default field separator.
2418             The root cause was the underlying F
2419             that did not handle the case of reducers that generated
2420             output with a different field separator than the input.
2421             We now detect and repair incompatible field separators.
2422             This change corrects a problem originally documented and detected
2423             in Fsdb-2.20.
2424             Bug re-reported by Unkyu Park.
2425              
2426             =back
2427              
2428             =head2 2.25, 2011-08-07
2429             Two new tools, F and F, and a bugfix for two people.
2430              
2431             =over 4
2432              
2433             =item IMPROVEMENT
2434              
2435             F now supports a F<--utc> option,
2436             which also fixes this test case for users outside of the Pacific
2437             time zone. Bug reported by David Graff, and also by Peter Desnoyers
2438             (within a week of each other :-)
2439              
2440             =item NEW
2441              
2442             F can convert simple, very regular XML files into Fsdb.
2443              
2444             =item NEW
2445              
2446             F "pivots" a file, converting multiple rows
2447             corresponding to the same entity into a single row with multiple columns.
2448              
2449             =back
2450              
2451             =head2 2.26, 2011-12-12
2452             Bug fixes, particularly for perl-5.14.2.
2453              
2454             =over 4
2455              
2456             =item BUG FIX
2457              
2458             Bugs fixed in L manual page.
2459              
2460             =item BUG FIX
2461              
2462             Fixed problems where L was truncating floating point numbers
2463             when sorting. This strange behavior happens as of perl-5.14.2 and
2464             it I like a Perl bug. I've worked around it for the test suites,
2465             but I'm a bit nervous.
2466              
2467             =back
2468              
2469             =head2 2.27, 2012-11-15
2470             Accumulated bug fixes.
2471              
2472             =over 4
2473              
2474             =item IMPROVEMENT
2475              
2476             F now reports errors in CVS input with real diagnostics.
2477              
2478             =item IMPROVEMENT
2479              
2480             F can now compute median, when given the C<-m> option.
2481              
2482             =item BUG FIX
2483              
2484             F non-numeric handling (the C<-a> option) now works properly.
2485              
2486             =item DOCUMENTATION
2487              
2488             The internal
2489             F test framework
2490             is now documented.
2491              
2492             =item BUG FIX
2493              
2494             F now correctly handles the case where there is no input
2495             (previously it output a blank line, which is a malformed fsdb file).
2496             Thanks to Yuri Pradkin for reporting this bug.
2497              
2498             =back
2499              
2500             =head2 2.28, 2012-11-15
2501             A quick release to fix most rpmlint errors.
2502              
2503             =over 4
2504              
2505             =item BUG FIX
2506              
2507             Fixed a number of minor release problems (wrong permissions, old FSF
2508             address, etc.) found by rpmlint.
2509              
2510             =back
2511              
2512             =head2 2.29, 2012-11-20
2513             a quick release for CPAN testing
2514              
2515             =over 4
2516              
2517             =item IMPROVEMENT
2518              
2519             Tweaked the RPM spec.
2520              
2521             =item IMPROVEMENT
2522              
2523             Modified F to fail gracefully on Perl installations
2524             that lack threads. (Without this fix, I get massive failures
2525             in the non-ithreads test system.)
2526              
2527             =back
2528              
2529             =head2 2.30, 2012-11-25
2530             improvements to perl portability
2531              
2532             =over 4
2533              
2534             =item BUG FIX
2535              
2536             Removed unicode character in documention of F
2537             so pod tests will pass. (Sigh, that should work :-( )
2538              
2539             =item BUG FIX
2540              
2541             Fixed test suite failures on 5 tests (F
2542             was the first) due to L's addition of a period.
2543             This problem was breaking Fsdb on perl-5.17.
2544             Thanks to Michael McQuaid for helping diagnose this problem.
2545              
2546             =item IMPROVEMENT
2547              
2548             The test suite now prints out the names of tests it tries.
2549              
2550             =back
2551              
2552             =head2 2.31, 2012-11-28
2553             A release with actual improvements to dbfilepivot and dbrowuniq.
2554              
2555             =over 4
2556              
2557             =item BUG FIX
2558              
2559             Documentation fixes: typos in L,
2560             bugs in L,
2561             clarification for comment handling in L.
2562              
2563             =item IMPROVEMENT
2564              
2565             Previously L assumed the input was grouped by keys
2566             and didn't very that pre-condition.
2567             Now there is no pre-condition (it will sort the input by default),
2568             and it checks if the invariant is violated.
2569              
2570             =item BUG FIX
2571              
2572             Previously L failed if the input had comments (oops :-);
2573             no longer.
2574              
2575             =item IMPROVEMENT
2576              
2577             Now L has the C<-L> option to preserve the last
2578             unique row (instead of the first), a common idiom.
2579              
2580             =back
2581              
2582             =head2 2.32, 2012-12-21
2583             Test suites should now be more numerically robust.
2584              
2585             =over 4
2586              
2587             =item NEW
2588              
2589             New L does fsdb-aware file differencing.
2590             It does not do smart intuition of add/removes like Unix diff(1),
2591             but it does know about columns, and with C<-E>, it does
2592             numeric-aware differences.
2593              
2594             =item IMPROVEMENT
2595              
2596             Test suites that are numeric now use L to do numeric-aware
2597             comparisons, so the test suite should now be robust to slightly different
2598             computers and operating systems and compilers than I what I use.
2599              
2600             =back
2601              
2602             =head2 2.33, 2012-12-23
2603             Minor fixes to some test cases.
2604              
2605             =over 4
2606              
2607             =item IMPROVEMENT
2608              
2609             L and L
2610             now supports the C<-N> option to give the new column a
2611             different name. (And a test cases where this duplication mattered
2612             have been fixed.)
2613              
2614             =item IMPROVEMENT
2615              
2616             L now show the t-test breakpoint with a reasonable number of
2617             floating point digits.
2618              
2619             =item BUG FIX
2620              
2621             Fixed a numerical stability problem in the F test case.
2622              
2623             =back
2624              
2625             =head1 WHAT'S NEW
2626              
2627             =head2 2.34, 2013-02-10
2628             Parallelism in L.
2629              
2630             =over 4
2631              
2632             =item IMPROVEMENT
2633              
2634             Documention for L now includes resource requirements.
2635              
2636             =item IMPROVEMENT
2637              
2638             Default memory usage for L is now about 256MB.
2639             (The world keeps moving forward.)
2640              
2641             =item IMPROVEMENT
2642              
2643             L now does merging in parallel.
2644             As a side-effect, L should be faster when
2645             input overflows memory. The level of parallelism
2646             can be limited with the C<--parallelism> option.
2647             (There is more work to do here, but we're off to a start.)
2648              
2649             =back
2650              
2651             =head2 2.35, 2013-02-23
2652             Improvements to dbmerge parallelism
2653              
2654             =over 4
2655              
2656             =item BUG FIX
2657              
2658             Fsdb temporary files are now created more securely (with File::Temp).
2659              
2660             =item IMPROVEMENT
2661              
2662             Programs that sort or merge on fields (L, L, L,
2663             L) now report an error if no fields on which to join or merge
2664             are given.
2665              
2666             =item IMPROVEMENT
2667              
2668             Parallelism in L is should now be more consistent,
2669             with less starting and stopping.
2670              
2671             =item IMPROVEMENT
2672             In L, the C<--xargs> option lets one give input filenames on
2673             standard input, rather than the command line.
2674             This feature paves the way for faster dbsort for large inputs
2675             (by pipelining sorting and merging), expected in the next release.
2676              
2677             =back
2678              
2679              
2680             =head2 2.36, 2013-02-25
2681             dbsort pipelines with dbmerge
2682              
2683             =over 4
2684              
2685             =item IMPROVEMENT
2686             For large inputs,
2687             L now pipelines sorting and merging,
2688             allowing earlier processing.
2689              
2690             =item BUG FIX
2691             Since 2.35, L delayed cleanup of intermediate files,
2692             thereby requiring extra disk space.
2693              
2694             =back
2695              
2696             =head2 2.37, 2013-02-26
2697             quick bugfix to support parallel sort and merge from recent releases
2698              
2699             =over 4
2700              
2701             =item BUG FIX
2702             Since 2.35, L delayed removal of input files given by
2703             C<--xargs>. This problem is now fixed.
2704              
2705             =back
2706              
2707              
2708             =head2 2.38, 2013-04-29
2709             minor bug fixes
2710              
2711             =over 4
2712              
2713             =item CLARIFICATION
2714              
2715             Configure now rejects Windows since tests seem to hang
2716             on some versions of Windows.
2717             (I would love help from a Windows developer to get this problem fixed,
2718             but I cannot do it.) See F.
2719              
2720             =item IMPROVEMENT
2721              
2722             All programs that use temporary files
2723             (L, L, L, L)
2724             now take the C<-T> option
2725             and set the temporary directory consistently.
2726              
2727             In addition, error messages are better when the temporary directory
2728             has problems. Problem reported by Liang Zhu.
2729              
2730             =item BUG FIX
2731              
2732             L was failing with external, map-reduce aware reducers
2733             (when invoked with -M and an external program).
2734             (Sigh, did this case ever work?)
2735             This case should now work.
2736             Thanks to Yuri Pradkin for reporting this bug (in 2011).
2737              
2738             =item BUG FIX
2739              
2740             Fixed perl-5.10 problem with L.
2741             Thanks to Yuri Pradkin for reporting this bug (in 2013).
2742              
2743             =back
2744              
2745             =head2 2.39, date 2013-05-31
2746             quick release for the dbrowuniq extension
2747              
2748             =over 4
2749              
2750             =item BUG FIX
2751              
2752             Actually in 2.38, the Fedora F<.spec> got cleaner dependencies.
2753             Suggestion from Christopher Meng via L.
2754              
2755             =item ENHANCEMENT
2756              
2757             Fsdb files are now explicitly set into UTF-8 encoding,
2758             unless one specifies C<-encoding> to C.
2759              
2760             =item ENHANCEMENT
2761              
2762             L now supports C<-I> for incremental counting.
2763              
2764             =back
2765              
2766             =head2 2.40, 2013-07-13
2767             small bug fixes
2768              
2769             =over 4
2770              
2771             =item BUG FIX
2772              
2773             L now has more respect for a user-given temporary directory;
2774             it no longer is ignored for merging.
2775              
2776             =item IMPROVEMENT
2777              
2778             L now has options to output the first, last, and both first
2779             and last rows of a run (C<-F>, C<-L>, and C<-B>).
2780              
2781             =item BUG FIX
2782              
2783             L now correctly handles C<-N>. Sigh, it didn't work before.
2784              
2785             =back
2786              
2787             =head2 2.41, 2013-07-29
2788             small bug and packaging fixes
2789              
2790             =over 4
2791              
2792             =item ENHANCEMENT
2793              
2794             Documentation to L improved
2795             (inspired by questions from Qian Kun).
2796              
2797             =item BUG FIX
2798              
2799             L no longer duplicates
2800             singleton unique lines when outputting both (with C<-B>).
2801              
2802             =item BUG FIX
2803              
2804             Add missing C dependency to F.
2805              
2806             =item ENHANCEMENT
2807              
2808             Tests now show the diff of the failing output
2809             if run with C.
2810              
2811             =item ENHANCEMENT
2812              
2813             L now includes documentation for how to output extra rows.
2814             Suggestion from Yuri Pradkin.
2815              
2816             =item BUG FIX
2817              
2818             Several improvements to the Fedora package
2819             from Michael Schwendt
2820             via L,
2821             and from the harsh master that is F.
2822             (I am stymied at teaching it that "outliers" is spelled correctly.
2823             Maybe I should send it Schneier's book. And an unresolvable
2824             invalid-spec-name lurks in the SRPM.)
2825              
2826             =back
2827              
2828             =head2 2.42, 2013-07-31
2829             A bug fix and packaging release.
2830              
2831             =over 4
2832              
2833             =item ENHANCEMENT
2834              
2835             Documentation to L improved
2836             to better memory usage.
2837             (Based on problem report by Lin Quan.)
2838              
2839             =item BUG FIX
2840              
2841             The F<.spec> is now F
2842             to satisfy F.
2843             Thanks to Christopher Meng for a specific bug report.
2844              
2845             =item BUG FIX
2846              
2847             Test F no longer has a column
2848             that caused failures because of numerical instability.
2849              
2850             =item BUG FIX
2851              
2852             Some tests now better handle bugs in old versions of perl (5.10, 5.12).
2853             Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12,
2854             but the fix should affect other platforms.
2855              
2856             =back
2857              
2858             =head2 2.43, 2013-08-27
2859             Adds in-file compression.
2860              
2861             =over 4
2862              
2863             =item BUG FIX
2864              
2865             Changed the sort on F to strings
2866             (from numerics) so we're less susceptible to false test-failures
2867             due to floating point IO differences.
2868              
2869             =item EXPERIMENTAL ENHANCEMENT
2870              
2871             Yet more parallelism in L:
2872             new "endgame-mode" builds a merge tree of processes at the end
2873             of large merge tasks to get maximally parallelism.
2874             Currently this feature is off by default
2875             because it can hang for some inputs.
2876             Enable this experimental feature with C<--endgame>.
2877              
2878             =item ENHANCEMENT
2879              
2880             C now handles being given C objects
2881             (as exercised by L).
2882              
2883             =item BUG FIX
2884              
2885             Handling of NamedTmpfiles now supports concurrency.
2886             This fix will hopefully fix occasional
2887             "Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93."
2888             errors.
2889              
2890             =item BUG FIX
2891              
2892             Fsdb now requires perl 5.10.
2893             This is a bug fix because some test cases used to require it,
2894             but this fact was not properly documented.
2895             (Back-porting to 5.008 would require removing all C operators.)
2896              
2897             =item ENHANCEMENT
2898              
2899             Fsdb now handles automatic compression of file contents.
2900             Enable compression with C
2901             (or C or C).
2902             All programs should operate on compressed files
2903             and leave the output with the same level of compression.
2904             C is recommended as fastest and most efficient.
2905             C is produces unrepeatable output (and so has no
2906             output test), it seems to insist on adding a timestamp.
2907              
2908             =back
2909              
2910             =head2 2.44, 2013-10-02
2911             A major change--all threads are gone.
2912              
2913             =over 4
2914              
2915             =item ENHANCEMENT
2916              
2917             Fsdb is now thread free and only uses processes for parallelism.
2918             This change is a big change--the entire motivation for Fsdb-2
2919             was to exploit parallelism via threading.
2920             Parallelism--good, but perl threading--bad for performance.
2921             Horribly bad for performance.
2922             About 20x worse than pipes on my box.
2923             (See perl bug #119445 for the discussion.)
2924              
2925             =item NEW
2926              
2927             C provides a thread-like abstraction over forking,
2928             with some nice support for callbacks in the parent upon child termination.
2929              
2930             =item ENHANCEMENT
2931              
2932             Details about removing threads:
2933             C is thread free,
2934             and new tests to verify each of its parts.
2935             The easy cases are C,
2936             C, C, C, and
2937             C, each of which use it in simple ways (2013-09-09).
2938             C is now thread free (2013-09-13),
2939             but was a significant rewrite,
2940             which brought C along.
2941             C is partly thread free (2013-09-21),
2942             again as a rewrite,
2943             and it brings C along.
2944             Full C support took much longer (2013-10-02).
2945              
2946             =item BUG FIX
2947              
2948             When running with user-only output (C<-n>),
2949             L now resets the output vector C<$ofref>
2950             after it has been output.
2951              
2952             =item NEW
2953              
2954             L will create all columns at the head of each row
2955             with the C<--first> option.
2956              
2957             =item NEW
2958              
2959             L will concatenate two files,
2960             verifying that they have the same schema.
2961              
2962             =item ENHANCEMENT
2963              
2964             L now passes comments through,
2965             rather than eating them as before.
2966              
2967             Also, L now supports a C<--> option to prevent misinterpreting
2968             sub-program parameters as for dbmapreduce.
2969              
2970             =item INCOMPATIBLE CHANGE
2971              
2972             L no longer figures out if it needs to add the key
2973             to the output. For multi-key-aware reducers, it never does
2974             (and cannot). For non-multi-key-aware reducers,
2975             it defaults to add the key and will now fail if the reducer adds the key
2976             (with error "dbcolcreate: attempt to create pre-existing column...").
2977             In such cases, one must disable adding the key with the new
2978             option C<--no-prepend-key>.
2979              
2980             =item INCOMPATIBLE CHANGE
2981              
2982             L no longer copies the input field separator by default.
2983             For multi-key-aware reducers, it never does
2984             (and cannot). For non-multi-key-aware reducers,
2985             it defaults to I copying the field separator,
2986             but it will copy it (the old default) with the C<--copy-fs> option
2987              
2988             =back
2989              
2990             =head2 2.45, 2013-10-07
2991             cleanup from de-thread-ification
2992              
2993             =over 4
2994              
2995             =item BUG FIX
2996              
2997             Corrected a fast busy-wait in L.
2998              
2999             =item ENHANCEMENT
3000              
3001             Endgame mode enabled in L; it (and also large cases of L)
3002             should now exploit greater parallelism.
3003              
3004             =item BUG FIX
3005              
3006             Test case with C (gone since 2.44) now removed.
3007              
3008             =back
3009              
3010             =head2 2.46, 2013-10-08
3011             continuing cleanup of our no-threads version
3012              
3013             =over 4
3014              
3015             =item BUG FIX
3016              
3017             Fixed some packaging details.
3018             (Really, threads are no longer required,
3019             missing tests in the MANIFEST.)
3020              
3021             =item IMPROVEMENT
3022              
3023             L now better communicates with the merge process to avoid
3024             bursty parallelism.
3025              
3026             L now can take C<-autoflush => 1>
3027             for line-buffered IO.
3028              
3029             =back
3030              
3031             =head2 2.47, 2013-10-12
3032             test suite cleanup for non-threaded perls
3033              
3034             =over 4
3035              
3036             =item BUG FIX
3037              
3038             Removed some stray "use threads" in some test cases.
3039             We didn't need them, and these were breaking non-threaded perls.
3040              
3041             =item BUG FIX
3042              
3043             Better handling of Fred cleanup;
3044             should fix intermittent L failures on BSD.
3045              
3046             =item ENHANCEMENT
3047              
3048             Improved test framework to show output when tests fail.
3049             (This time, for real.)
3050              
3051             =back
3052              
3053             =head2 2.48, 2014-01-03
3054             small bugfixes and improved release engineering
3055              
3056             =over 4
3057              
3058             =item ENHANCEMENT
3059              
3060             Test suites now skip tests for libraries that are missing.
3061             (Patch for missing C contributed by Calvin Ardi.)
3062              
3063             =item ENHANCEMENT
3064              
3065             Removed references to Jdb in the package specification.
3066             Since the name was changed in 2008, there's no longer a huge
3067             need for backwards comparability.
3068             (Suggestion form Petr Å abata.)
3069              
3070             =item ENHANCEMENT
3071              
3072             Test suites now invoke the perl using the path from C<$Config{perlpath}>.
3073             Hopefully this helps testing in environments where there are multiple installed
3074             perls and the default perl is not the same as the perl-under-test
3075             (as happens in cpantesters.org).
3076              
3077             =item BUG FIX
3078              
3079             Added specific encoding to this manpage to account for
3080             Unicode. Required to build correctly against perl-5.18.
3081              
3082             =back
3083              
3084             =head2 2.49, 2014-01-04
3085             bugfix to unicode handling in Fsdb IO (plus minor packaging fixes)
3086              
3087             =over 4
3088              
3089             =item BUG FIX
3090              
3091             Restored a line in the F<.spec> to chmod g-s.
3092              
3093             =item BUG FIX
3094              
3095             Unicode decoding is now handled correctly for programs that read
3096             from standard input.
3097             (Also: New test scripts cover unicode input and output.)
3098              
3099             =item BUG FIX
3100              
3101             Fix to L documentation encoding line.
3102             Addresses test failure in perl-5.16 and earlier.
3103             (Who knew "encoding" had to be followed by a blank line.)
3104              
3105             =back
3106              
3107             =head1 WHAT'S NEW
3108              
3109             =head2 2.50, 2014-05-27
3110             a quick release for spec tweaks
3111              
3112             =over 4
3113              
3114             =item ENHANCEMENT
3115              
3116             In L, the C<-N> (no output, even comments) option now
3117             implies C<-n>, and it now suppresses the header and trailer.
3118              
3119             =item BUG FIX
3120              
3121             A few more tweaks to the F from Petr Å abata.
3122              
3123             =item BUG FIX
3124              
3125             Fixed 3 uses of C in test suites that were causing test
3126             failures (due to warnings, not real failures) on some platforms.
3127              
3128             =back
3129              
3130             =head2 2.51, 2014-09-05
3131             Feature enhancements to L, L, L, and new L
3132              
3133             =over 4
3134              
3135             =item ENHANCEMENT
3136              
3137             L now has a C<--no-recreate-fatal>
3138             that causes it to ignore creation of existing columns
3139             (instead of failing).
3140              
3141             =item ENHANCEMENT
3142              
3143             L once again is robust to reducers
3144             that output the key;
3145             C<--no-prepend-key> is no longer mandatory.
3146              
3147             =item ENHANCEMENT
3148              
3149             L can now enumerate the output rows with C<-E>.
3150              
3151             =item BUG FIX
3152              
3153             L is more mathematically robust.
3154             Previously for some inputs and some platforms,
3155             floating point rounding could
3156             sometimes cause squareroots of negative numbers.
3157              
3158             =item NEW
3159              
3160             L converts the output of the MySQL or MarinaDB
3161             select comment into fsdb format.
3162              
3163             =item INCOMPATIBLE CHANGE
3164              
3165             L now outputs the I row
3166             when doing sloppy numeric comparisons,
3167             to better support test suites.
3168              
3169             =back
3170              
3171             =head2 2.52, 2014-11-03
3172             Fixing the test suite for line number changes.
3173              
3174             =over 4
3175              
3176             =item ENHANCEMENT
3177              
3178             Test suites changes to be robust to exact line numbers of failures,
3179             since different Perl releases fail on different lines.
3180             L
3181              
3182             =back
3183              
3184              
3185             =head2 2.53, 2014-11-26
3186             bug fixes and stability improvements to dbmapreduce
3187              
3188             =over 4
3189              
3190             =item ENHANCEMENT
3191              
3192             The L how supports a C<--quiet> option.
3193              
3194             =item ENHANCEMENT
3195              
3196             Better documention of L.
3197              
3198             =item BUGFIX
3199              
3200             Added groff-base and perl-podlators to the Fedora package spec.
3201             Fixes L.
3202             (Also in package 2.52-2.)
3203              
3204             =item BUGFIX
3205              
3206             An important stability improvement to L.
3207             It, plus L, and L now support
3208             controlled parallelism with the C<--pararallelism=N> option.
3209             They default to run with the number of available CPUs.
3210             L also moderates its level of parallelism.
3211             Previously it would create reducers as needed,
3212             causing CPU thrashing if reducers ran much slower than data production.
3213              
3214             =item BUGFIX
3215              
3216             The combination of L with L now works
3217             as it should. (The obscure bug was an interaction with L
3218             with non-multi-key reducers that output their own key. L
3219             has too many useful corner cases.)
3220              
3221             =back
3222              
3223             =head2 2.54, 2014-11-28
3224             fix for the test suite to correct failing tests on not-my-platform
3225              
3226             =over 4
3227              
3228             =item BUGFIX
3229              
3230             Sigh, the test suite now has a test suite.
3231             Because, yes, I broke it, causing many incorrect failures
3232             at cpantesters.
3233             Now fixed.
3234              
3235             =back
3236              
3237             =head2 2.55, 2015-01-05
3238             many spelling fixes and L tests are more robust to different numeric precision
3239              
3240             =over 4
3241              
3242             =item ENHANCEMENT
3243              
3244             L now can be extra quiet, as I continue to try to track down
3245             a numeric difference on FreeBSD AMD boxes.
3246              
3247             =item ENHANCEMENT
3248              
3249             L gave different test output
3250             (just reflecting rounding error)
3251             when stddev approaches zero. We now detect hand handle this case.
3252             See
3253             and thanks to H. Merijn Brand for the bug report.
3254              
3255             =item BUG FIX
3256              
3257             Many, many spelling bugs found by
3258             H. Merijn Brand; thanks for the bug report.
3259              
3260             =item INCOMPATBLE CHANGE
3261              
3262             A number of programs had misspelled "separator"
3263             in C<--fieldseparator> and C<--columnseparator> options as "seperator".
3264             These are now correctly spelled.
3265              
3266             =back
3267              
3268             =head2 2.56, 2015-02-03
3269             fix against Getopt::Long-2.43's stricter error checkign
3270              
3271             =over 4
3272              
3273             =item BUG FIX
3274              
3275             Internal argument parsing uses Getopt::Long, but mixed pass-through and EE.
3276             Bug reported by Petr Pisar at L.a
3277              
3278             =item BUG FIX
3279              
3280             Added missing BuildRequires for C.
3281              
3282             =back
3283              
3284             =head2 2.57, 2015-04-29
3285             Minor changes, with better performance from L.
3286              
3287             =over 4
3288              
3289             =item BUG FIX
3290              
3291             L now honors C<--remove-inputs> (previously it didn't).
3292             This omission meant that L (and L) would accumulate
3293             files in F when running. Bad news for inputs with 4M keys.
3294              
3295             =item ENHANCMENT
3296              
3297             L should be faster with lots of small keys.
3298             L now supports C<-k> to get some of the functionality of
3299             L (if data is pre-sorted and median/quartiles are not required).
3300              
3301             L now honors C<--remove-inputs> (previously it didn't).
3302             This omission meant that L (and L) would accumulate
3303             files in F when running. Bad news for inputs with 4M keys.
3304              
3305             =back
3306              
3307              
3308             =head2 2.58, 2015-04-30
3309             Bugfix in L
3310              
3311             =over 4
3312              
3313             =item BUG FIX
3314              
3315             Fixed a case where L suffered mojobake in endgame mode.
3316             This bug surfaced when L was applied to large files
3317             (big enough to require merging) with unicode in them;
3318             the symptom was soemthing like:
3319             Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111.
3320              
3321             =back
3322              
3323              
3324             =head2 2.59, 2016-09-01
3325             Collect a few small bug fixes and documentation improvements.
3326              
3327             =over 4
3328              
3329             =item BUG FIX
3330              
3331             More IO is explicitly marked UTF-8 to avoid Perl's tendency to
3332             mojibake on otherwise valid unicode input.
3333             This change helps L.
3334              
3335             =item ENHANCEMENT
3336              
3337             L now crossreferences L.
3338              
3339             =item ENHANCEMENT
3340              
3341             Documentation for L now clarifies that the default is baseline mode.
3342              
3343             =item BUG FIX
3344              
3345             L now propagates C<-T> into the sorting process (if it is required).
3346             Thanks to Lan Wei for reporting this bug.
3347              
3348             =back
3349              
3350              
3351             =head2 2.60, 2016-09-04
3352             Adds support for hash joins.
3353              
3354             =over 4
3355              
3356             =item ENHANCEMENT
3357              
3358             L now supports hash joins
3359             with C<-t lefthash> and C<-t righthash>.
3360             Hash joins cache a table in memory, but do not require
3361             that the other table be sorted.
3362             They are ideal when joining a large table against a small one.
3363              
3364             =back
3365              
3366             =head2 2.61, 2016-09-05
3367             Support left and right outer joins.
3368              
3369             =over 4
3370              
3371             =item ENHANCEMENT
3372              
3373             L now handles left and right outer joins
3374             with C<-t left> and C<-t right>.
3375              
3376             =item ENHANCEMENT
3377              
3378             L hash joins are now selected
3379             with C<-m lefthash> and C<-m righthash>
3380             (not the shortlived C<-t righthash> option).
3381             (Technically this change is incompatible with Fsdd-2.60, but
3382             no one but me ever used that version.)
3383              
3384             =back
3385              
3386             =head1 AUTHOR
3387              
3388             John Heidemann, C
3389              
3390             See L for the many people who have contributed
3391             bug reports and fixes.
3392              
3393              
3394             =head1 COPYRIGHT
3395              
3396             Fsdb is Copyright (C) 1991-2016 by John Heidemann .
3397              
3398             This program is free software; you can redistribute it and/or modify
3399             it under the terms of version 2 of the GNU General Public License as
3400             published by the Free Software Foundation.
3401              
3402             This program is distributed in the hope that it will be useful, but
3403             WITHOUT ANY WARRANTY; without even the implied warranty of
3404             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3405             General Public License for more details.
3406              
3407             You should have received a copy of the GNU General Public License
3408             along with this program; if not, write to the Free Software
3409             Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3410              
3411             A copy of the GNU General Public License can be found in the file
3412             ``COPYING''.
3413              
3414              
3415              
3416             =head1 COMMENTS and BUG REPORTS
3417              
3418             Any comments about these programs should be sent to John Heidemann
3419             C.
3420              
3421              
3422             =cut
3423              
3424             1; # End of Fsdb
3425              
3426             # LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg
3427             # LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un
3428             # LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw
3429             # LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize
3430             # LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct
3431             # LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR
3432             # LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss
3433             # LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff
3434             # LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb
3435             # LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley
3436             # LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl
3437             # LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar
3438             # LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed
3439             # LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah
3440             # LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt
3441             # LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit
3442             # LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh
3443             # LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre
3444             # LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr
3445             # LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz
3446             # LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO
3447             # LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen
3448             # LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw
3449             # LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount
3450             # LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho
3451             # LocalWords: alfred david clark constantine debrorah Fsdb's colized listized
3452             # LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa
3453             # LocalWords: refcount lossage DaGronk dbcolscorellate ipchain