File Coverage

blib/lib/Fsdb.pm
Criterion Covered Total %
statement 9 9 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 12 12 100.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # Fsdb.pm
5             #
6             # Copyright (C) 1991-2016 by John Heidemann
7             #
8             # This program is free software; you can redistribute it and/or
9             # modify it under the terms of the GNU General Public License,
10             # version 2, as published by the Free Software Foundation.
11             #
12             # This program is distributed in the hope that it will be useful,
13             # but WITHOUT ANY WARRANTY; without even the implied warranty of
14             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15             # GNU General Public License for more details.
16             #
17             # You should have received a copy of the GNU General Public License along
18             # with this program; if not, write to the Free Software Foundation, Inc.,
19             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20             #
21              
22             package Fsdb;
23              
24 2     2   141648 use warnings;
  2         2  
  2         60  
25 2     2   6 use strict;
  2         2  
  2         31  
26 2     2   995 use utf8;
  2         18  
  2         8  
27              
28             =encoding utf8
29              
30             =head1 NAME
31              
32             Fsdb - a flat-text database for shell scripting
33              
34              
35             =cut
36             our $VERSION = '2.63';
37              
38             =head1 SYNOPSIS
39              
40             Fsdb, the flatfile streaming database is package of commands
41             for manipulating flat-ASCII databases from
42             shell scripts. Fsdb is useful to process medium amounts of data (with
43             very little data you'd do it by hand, with megabytes you might want a
44             real database).
45             Fsdb was known as as Jdb from 1991 to Oct. 2008.
46              
47             Fsdb is very good at doing things like:
48              
49             =over 4
50              
51             =item *
52              
53             extracting measurements from experimental output
54              
55             =item *
56              
57             examining data to address different hypotheses
58              
59             =item *
60              
61             joining data from different experiments
62              
63             =item *
64              
65             eliminating/detecting outliers
66              
67             =item *
68              
69             computing statistics on data
70             (mean, confidence intervals, correlations, histograms)
71              
72             =item *
73              
74             reformatting data for graphing programs
75              
76             =back
77              
78             Fsdb is built around the idea of a flat text file as a database.
79             Fsdb files (by convention, with the extension F<.fsdb>),
80             have a header documenting the schema (what the columns mean),
81             and then each line represents a database record (or row).
82              
83             For example:
84              
85             #fsdb experiment duration
86             ufs_mab_sys 37.2
87             ufs_mab_sys 37.3
88             ufs_rcp_real 264.5
89             ufs_rcp_real 277.9
90              
91             Is a simple file with four experiments (the rows),
92             each with a description, size parameter, and run time
93             in the first, second, and third columns.
94              
95             Rather than hand-code scripts to do each special case, Fsdb provides
96             higher-level functions. Although it's often easy throw together a
97             custom script to do any single task, I believe that there are several
98             advantages to using Fsdb:
99              
100             =over 4
101              
102             =item *
103              
104             these programs provide a higher level interface than plain Perl, so
105              
106             =over 4
107              
108             =item **
109              
110             Fewer lines of simpler code:
111              
112             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
113              
114             Picks out just one type of experiment and computes statistics on it,
115             rather than:
116              
117             while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; }
118             $mean = $sum / $n; $std_dev = ...
119              
120             in dozens of places.
121              
122             =back
123              
124             =item *
125              
126             the library uses names for columns, so
127              
128             =over 4
129              
130             =item **
131              
132             No more C<$F[1]>, use C<_duration>.
133              
134             =item **
135              
136             New or different order columns? No changes to your scripts!
137              
138             =back
139              
140             Thus if your experiment gets more complicated with a size parameter,
141             so your log changes to:
142              
143             #fsdb experiment size duration
144             ufs_mab_sys 1024 37.2
145             ufs_mab_sys 1024 37.3
146             ufs_rcp_real 1024 264.5
147             ufs_rcp_real 1024 277.9
148             ufs_mab_sys 2048 45.3
149             ufs_mab_sys 2048 44.2
150              
151             Then the previous scripts still work, even though duration is
152             now the third column, not the second.
153              
154             =item *
155              
156             A series of actions are self-documenting (each program records what it does).
157              
158             =over 4
159              
160             =item **
161              
162             No more wondering what hacks were used to compute the
163             final data, just look at the comments at the end
164             of the output.
165              
166             =back
167              
168             For example, the commands
169              
170             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
171              
172             add to the end of the output the lines
173             # | dbrow _experiment eq "ufs_mab_sys"
174             # | dbcolstats duration
175              
176              
177             =item *
178              
179             The library is mature, supporting large datasets,
180             corner cases, error handling, backed by an automated test suite.
181              
182             =over 4
183              
184             =item **
185              
186             No more puzzling about bad output because your custom script
187             skimped on error checking.
188              
189             =item **
190              
191             No more memory thrashing when you try to sort ten million records.
192              
193             =back
194              
195             =item *
196              
197             Fsdb-2.x supports Perl scripting (in addition to shell scripting),
198             with libraries to do Fsdb input and output, and easy support for pipelines.
199             The shell script
200              
201             dbcol name test1 | dbroweval '_test1 += 5;'
202              
203             can be written in perl as:
204              
205             dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;'));
206              
207             =back
208              
209             (The disadvantage is that you need to learn what functions Fsdb provides.)
210              
211             Fsdb is built on flat-ASCII databases. By storing data in simple text
212             files and processing it with pipelines it is easy to experiment (in
213             the shell) and look at the output.
214             To the best of my knowledge, the original implementation of
215             this idea was C, a commercial product described in the book
216             I
217             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and
218             also at the web page L). Fsdb is an incompatible
219             re-implementation of their idea without any accelerated indexing or
220             forms support. (But it's free, and probably has better statistics!).
221              
222             Fsdb-2.x will exploit multiple processors or cores,
223             and provides Perl-level support for input, output, and threaded-pipelines.
224             (As of Fsdb-2.44 it no longer uses Perl threading, just processes.)
225              
226             Installation instructions follow at the end of this document.
227             Fsdb-2.x requires Perl 5.8 to run.
228             All commands have manual pages and provide usage with the C<--help> option.
229             All commands are backed by an automated test suite.
230              
231             The most recent version of Fsdb is available on the web at
232             L.
233              
234              
235             =head1 WHAT'S NEW
236              
237             =head2 2.63, 2017-02-03
238             Re-add some features supposedly in 2.62 but not, and add more --header options.
239              
240             =over 4
241              
242             =item ENHANCEMENT
243              
244             The option B<-j> is now a synonym for B<--parallelism>.
245             (And several documention bugs about this option are fixed.)
246              
247             =item ENHANCEMENT
248              
249             Additional support for C<--header> in L, L, L,
250             and L.
251              
252             =item BUG FIX
253              
254             Version 2.62 was supposed to have this improvement, but did not (and now does):
255             L now allows the B<--possible-pivots> option,
256             and if it is provided processes the data in one pass.
257              
258             =item BUG FIX
259              
260             Version 2.62 was supposed to have this improvement, but did not (and now does):
261             L logs are now quoted.
262              
263             =back
264              
265              
266              
267             =head1 README CONTENTS
268              
269             =over 4
270              
271             =item executive summary
272              
273             =item what's new
274              
275             =item README CONTENTS
276              
277             =item installation
278              
279             =item basic data format
280              
281             =item basic data manipulation
282              
283             =item list of commands
284              
285             =item another example
286              
287             =item a gradebook example
288              
289             =item a password example
290              
291             =item history
292              
293             =item related work
294              
295             =item release notes
296              
297             =item copyright
298              
299             =item comments
300              
301             =back
302              
303              
304             =head1 INSTALLATION
305              
306             Fsdb now uses the standard Perl build and installation from
307             ExtUtil::MakeMaker(3), so the quick answer to installation is to type:
308            
309             perl Makefile.PL
310             make
311             make test
312             make install
313              
314             Or, if you want to install it somewhere else, change the first line to
315              
316             perl Makefile.PL PREFIX=$HOME
317              
318             and it will go in your home directory's F, etc.
319             (See L for more details.)
320              
321             Fsdb requires perl 5.8 or later.
322              
323             A test-suite is available, run it with
324              
325             make test
326              
327             A FreeBSD port to Fsdb is available, see
328             L.
329              
330             A Fink (MacOS X) port is available, see
331             L.
332             (Thanks to Lars Eggert for maintaining this port.)
333              
334              
335             =head1 BASIC DATA FORMAT
336              
337             These programs are based on the idea storing data in simple ASCII
338             files. A database is a file with one header line and then data or
339             comment lines. For example:
340              
341             #fsdb account passwd uid gid fullname homedir shell
342             johnh * 2274 134 John_Heidemann /home/johnh /bin/bash
343             greg * 2275 134 Greg_Johnson /home/greg /bin/bash
344             root * 0 0 Root /root /bin/bash
345             # this is a simple database
346              
347             The header line must be first and begins with C<#h>.
348             There are rows (records) and columns (fields),
349             just like in a normal database.
350             Comment lines begin with C<#>.
351             Column names are any string not containing spaces or single quote
352             (although it is prudent to keep them alphanumeric with underscore).
353              
354             By default, columns are delimited by whitespace.
355             With this default configuration, the contents of a field
356             cannot contain whitespace.
357             However, this limitation can be relaxed by changing the field separator
358             as described below.
359              
360             The big advantage of simple flat-text databases is that
361             it is usually easy to massage data into this format,
362             and it's reasonably easy to take data out of this
363             format into other (text-based) programs, like gnuplot, jgraph, and
364             LaTeX. Think Unix. Think pipes.
365             (Or even output to Excel and HTML if you prefer.)
366              
367             Since no-whitespace in columns was a problem for some applications,
368             there's an option which relaxes this rule. You can specify the field
369             separator in the table header with C<-F x> where C is
370             a code for the new field separator.
371             A full list of codes is at L,
372             but two common special values are C<-F t>
373             which is a separator of a single tab character,
374             and C<-F S>, a separator of two spaces.
375             Both allowing (single) spaces in fields. An example:
376              
377             #fsdb -F S account passwd uid gid fullname homedir shell
378             johnh * 2274 134 John Heidemann /home/johnh /bin/bash
379             greg * 2275 134 Greg Johnson /home/greg /bin/bash
380             root * 0 0 Root /root /bin/bash
381             # this is a simple database
382              
383             See L for more details. Regardless of what the column
384             separator is for the body of the data, it's always whitespace in the
385             header.
386              
387             There's also a third format: a "list". Because it's often hard to see
388             what's columns past the first two, in list format each "column" is on
389             a separate line. The programs dblistize and dbcolize convert to and
390             from this format, and all programs work with either formats.
391             The command
392              
393             dbfilealter -R C < DATA/passwd.fsdb
394              
395             outputs:
396              
397             #fsdb -R C account passwd uid gid fullname homedir shell
398             account: johnh
399             passwd: *
400             uid: 2274
401             gid: 134
402             fullname: John_Heidemann
403             homedir: /home/johnh
404             shell: /bin/bash
405            
406             account: greg
407             passwd: *
408             uid: 2275
409             gid: 134
410             fullname: Greg_Johnson
411             homedir: /home/greg
412             shell: /bin/bash
413            
414             account: root
415             passwd: *
416             uid: 0
417             gid: 0
418             fullname: Root
419             homedir: /root
420             shell: /bin/bash
421            
422             # this is a simple database
423             # | dblistize
424              
425             See L for more details.
426              
427              
428             =head1 BASIC DATA MANIPULATION
429              
430             A number of programs exist to manipulate databases.
431             Complex functions can be made by stringing together commands
432             with shell pipelines. For example, to print the home
433             directories of everyone with ``john'' in their names,
434             you would do:
435              
436             cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir
437              
438             The output might be:
439              
440             #fsdb homedir
441             /home/johnh
442             /home/greg
443             # this is a simple database
444             # | dbrow _fullname =~ /John/
445             # | dbcol homedir
446              
447             (Notice that comments are appended to the output listing each command,
448             providing an automatic audit log.)
449              
450             In addition to typical database functions (select, join, etc.) there
451             are also a number of statistical functions.
452              
453             The real power of Fsdb is that one can apply arbitrary code to rows
454             to do powerful things.
455              
456             cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/'
457              
458             converts "John_Heidemann" into "Heidemann,_John".
459             Not too much more work could split fullname into firstname and lastname
460             fields.
461              
462              
463             =head1 TALKING ABOUT COLUMNS
464              
465             An advantage of Fsdb is that you can talk about columns by name
466             (symbolically) rather than simply by their positions. So in the above
467             example, C pulled out the home directory column, and
468             C matched against column fullname.
469              
470             In general, you can use the name of the column listed on the C<#fsdb> line
471             to identify it in most programs, and _name to identify it in code.
472              
473             Some alternatives for flexibility:
474              
475             =over 4
476              
477             =item *
478              
479             Numeric values identify columns positionally, numbering from 0.
480             So 0 or _0 is the first column, 1 is the second, etc.
481              
482             =item *
483              
484             In code, _last_columnname gets the value from columname's previous row.
485              
486             =back
487              
488             See L for more details about writing code.
489              
490              
491              
492             =head1 LIST OF COMMANDS
493              
494             Enough said. I'll summarize the commands, and then you can
495             experiment. For a detailed description of each command, see a summary
496             by running it with the argument C<--help> (or C<-?> if you prefer.)
497             Full manual pages can be found by running the command
498             with the argument C<--man>, or running the Unix command C
499             or whatever program you want.
500              
501             =head2 TABLE CREATION
502              
503             =over 4
504              
505             =item dbcolcreate
506              
507             add columns to a database
508              
509             =item dbcoldefine
510              
511             set the column headings for a non-Fsdb file
512              
513             =back
514              
515             =head2 TABLE MANIPULATION
516              
517             =over 4
518              
519             =item dbcol
520              
521             select columns from a table
522              
523             =item dbrow
524              
525             select rows from a table
526              
527             =item dbsort
528              
529             sort rows based on a set of columns
530              
531             =item dbjoin
532              
533             compute the natural join of two tables
534              
535             =item dbcolrename
536              
537             rename a column
538              
539             =item dbcolmerge
540              
541             merge two columns into one
542              
543             =item dbcolsplittocols
544              
545             split one column into two or more columns
546              
547             =item dbcolsplittorows
548              
549             split one column into multiple rows
550              
551             =item dbfilepivot
552              
553             "pivots" a file, converting multiple rows
554             corresponding to the same entity into a single row with multiple columns.
555              
556             =item dbfilevalidate
557              
558             check that db file doesn't have some common errors
559              
560             =back
561              
562             =head2 COMPUTATION AND STATISTICS
563              
564             =over 4
565              
566             =item dbcolstats
567              
568             compute statistics over a column (mean,etc.,optionally median)
569              
570             =item dbmultistats
571              
572             group rows by some key value, then compute stats (mean, etc.) over each group
573             (equivalent to dbmapreduce with dbcolstats as the reducer)
574              
575             =item dbmapreduce
576              
577             group rows (map) and then apply an arbitrary function to each group (reduce)
578              
579             =item dbrvstatdiff
580              
581             compare two samples distributions (mean/conf interval/T-test)
582              
583             =item dbcolmovingstats
584              
585             computing moving statistics over a column of data
586              
587             =item dbcolstatscores
588              
589             compute Z-scores and T-scores over one column of data
590              
591             =item dbcolpercentile
592              
593             compute the rank or percentile of a column
594              
595             =item dbcolhisto
596              
597             compute histograms over a column of data
598              
599             =item dbcolscorrelate
600              
601             compute the coefficient of correlation over several columns
602              
603             =item dbcolsregression
604              
605             compute linear regression and correlation for two columns
606              
607             =item dbrowaccumulate
608              
609             compute a running sum over a column of data
610              
611             =item dbrowcount
612              
613             count the number of rows (a subset of dbstats)
614              
615             =item dbrowdiff
616              
617             compute differences between a columns in each row of a table
618              
619             =item dbrowenumerate
620              
621             number each row
622              
623             =item dbroweval
624              
625             run arbitrary Perl code on each row
626              
627             =item dbrowuniq
628              
629             count/eliminate identical rows (like Unix uniq(1))
630              
631             =item dbfilediff
632              
633             compare fields on rows of a file (something like Unix diff(1))
634              
635             =back
636              
637             =head2 OUTPUT CONTROL
638              
639             =over 4
640              
641             =item dbcolneaten
642              
643             pretty-print columns
644              
645             =item dbfilealter
646              
647             convert between column or list format, or change the column separator
648              
649             =item dbfilestripcomments
650              
651             remove comments from a table
652              
653             =item dbformmail
654              
655             generate a script that sends form mail based on each row
656              
657             =back
658              
659             =head2 CONVERSIONS
660              
661             (These programs convert data into fsdb. See their web pages for details.)
662              
663             =over 4
664              
665             =item cgi_to_db
666              
667             L
668              
669             =item combined_log_format_to_db
670              
671             L
672              
673             =item html_table_to_db
674              
675             HTML tables to fsdb (assuming they're reasonably formatted).
676              
677             =item kitrace_to_db
678              
679             L
680              
681             =item ns_to_db
682              
683             L
684              
685             =item sqlselect_to_db
686              
687             the output of SQL SELECT tables to db
688              
689             =item tabdelim_to_db
690              
691             spreadsheet tab-delimited files to db
692              
693             =item tcpdump_to_db
694              
695             (see man tcpdump(8) on any reasonable system)
696              
697             =item xml_to_db
698              
699             XML input to fsdb, assuming they're very regular
700              
701              
702             =back
703              
704             (And out of fsdb:)
705              
706             =over 4
707              
708             =item db_to_csv
709              
710             Comma-separated-value format from fsdb.
711              
712             =item db_to_html_table
713              
714             simple conversion of Fsdb to html tables
715              
716             =back
717              
718             =head2 STANDARD OPTIONS
719              
720             Many programs have common options:
721              
722             =over 4
723              
724             =item B<-?> or B<--help>
725              
726             Show basic usage.
727              
728             =item B<-N> on B<--new-name>
729              
730             When a command creates a new column like L's C,
731             this option lets one override the default name of that new column.
732              
733             =item B<-T TmpDir>
734              
735             where to put tmp files.
736             Also uses environment variable TMPDIR, if -T is
737             not specified.
738             Default is /tmp.
739              
740             Show basic usage.
741              
742             =item B<-c FRACTION> or B<--confidence FRACTION>
743              
744             Specify confidence interval FRACTION (L, L, etc.)
745              
746             =item B<-C S> or C<--element-separator S>
747              
748             Specify column separator S (L, L).
749              
750             =item B<-d> or B<--debug>
751              
752             Enable debugging (may be repeated for greater effect in some cases).
753              
754             =item B<-a> or B<--include-non-numeric>
755              
756             Compute stats over all data (treating non-numbers as zeros).
757             (By default, things that can't be treated as numbers
758             are ignored for stats purposes)
759              
760             =item B<-S> or B<--pre-sorted>
761              
762             Assume the data is pre-sorted.
763             May be repeated to disable verification (saving a small amount of work).
764              
765             =item B<-e E> or B<--empty E>
766              
767             give value E as the value for empty (null) records
768              
769             =item B<-i I> or B<--input I>
770              
771             Input data from file I.
772              
773             =item B<-o O> or B<--output O>
774              
775             Write data out to file O.
776              
777             =item B<--header> H
778              
779             Use H as the full Fsdb header, rather than reading a header from
780             then input. This option is particularly useful when using Fsdb
781             under Hadoop, where split files don't have heades.
782              
783             =item B<--nolog>.
784              
785             Skip logging the program in a trailing comment.
786              
787             =back
788              
789             When giving Perl code (in L and L)
790             column names can be embedded if preceded by underscores.
791             Look at L or L for examples.)
792              
793             Most programs run in constant memory and use temporary files if necessary.
794             Exceptions are L, L, L,
795             L, L.
796              
797              
798             =head1 ANOTHER EXAMPLE
799              
800             Take the raw data in C,
801             put a header on it (C),
802             took statistics of each category (C),
803             pick out the relevant fields (C), and you get:
804              
805             #fsdb size mean stddev pct_rsd
806             1024 1.4962e+06 2.8497e+05 19.047
807             10240 5.0286e+06 6.0103e+05 11.952
808             102400 4.9216e+06 3.0939e+05 6.2863
809             # | dbcoldefine size bw
810             # | /home/johnh/BIN/DB/dbmultistats -k size bw
811             # | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd
812              
813             (The whole command was:
814              
815             cat DATA/http_bandwidth |
816             dbcoldefine size |
817             dbmultistats -k size bw |
818             dbcol size mean stddev pct_rsd
819              
820             all on one line.)
821              
822             Then post-process them to get rid of the exponential notation
823             by adding this to the end of the pipeline:
824              
825             dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);'
826              
827             (Actually, this step is no longer required since L
828             now uses a different default format.)
829              
830             giving:
831              
832             #fsdb size mean stddev pct_rsd
833             1024 1496200 284970 19.047
834             10240 5028600 601030 11.952
835             102400 4921600 309390 6.2863
836             # | dbcoldefine size bw
837             # | dbmultistats -k size bw
838             # | dbcol size mean stddev pct_rsd
839             # | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); }
840              
841             In a few lines, raw data is transformed to processed output.
842              
843              
844             Suppose you expect there is an odd distribution of results of one
845             datapoint. Fsdb can easily produce a CDF (cumulative distribution
846             function) of the data, suitable for graphing:
847              
848             cat DB/DATA/http_bandwidth | \
849             dbcoldefine size bw | \
850             dbrow '_size == 102400' | \
851             dbcol bw | \
852             dbsort -n bw | \
853             dbrowenumerate | \
854             dbcolpercentile count | \
855             dbcol bw percentile | \
856             xgraph
857              
858             The steps, roughly:
859             1. get the raw input data and turn it into fsdb format,
860             2. pick out just the relevant column (for efficiency) and sort it,
861             3. for each data point, assign a CDF percentage to it,
862             4. pick out the two columns to graph and show them
863              
864              
865             =head1 A GRADEBOOK EXAMPLE
866              
867             The first commercial program I wrote was a gradebook,
868             so here's how to do it with Fsdb.
869              
870             Format your data like DATA/grades.
871              
872             #fsdb name email id test1
873             a a@ucla.example.edu 1 80
874             b b@usc.example.edu 2 70
875             c c@isi.example.edu 3 65
876             d d@lmu.example.edu 4 90
877             e e@caltech.example.edu 5 70
878             f f@oxy.example.edu 6 90
879              
880             Or if your students have spaces in their names, use C<-F S> and two spaces
881             to separate each column:
882              
883             #fsdb -F S name email id test1
884             alfred aho a@ucla.example.edu 1 80
885             butler lampson b@usc.example.edu 2 70
886             david clark c@isi.example.edu 3 65
887             constantine drovolis d@lmu.example.edu 4 90
888             debrorah estrin e@caltech.example.edu 5 70
889             sally floyd f@oxy.example.edu 6 90
890              
891             To compute statistics on an exam, do
892              
893             cat DATA/grades | dbstats test1 |dblistize
894              
895             giving
896              
897             #fsdb -R C ...
898             mean: 77.5
899             stddev: 10.84
900             pct_rsd: 13.987
901             conf_range: 11.377
902             conf_low: 66.123
903             conf_high: 88.877
904             conf_pct: 0.95
905             sum: 465
906             sum_squared: 36625
907             min: 65
908             max: 90
909             n: 6
910             ...
911              
912             To do a histogram:
913              
914             cat DATA/grades | dbcolhisto -n 5 -g test1
915              
916             giving
917              
918             #fsdb low histogram
919             65 *
920             70 **
921             75
922             80 *
923             85
924             90 **
925             # | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1
926              
927             Now you want to send out grades to the students by e-mail.
928             Create a form-letter (in the file F):
929              
930             To: _email (_name)
931             From: J. Random Professor
932             Subject: test1 scores
933              
934             _name, your score on test1 was _test1.
935             86+ A
936             75-85 B
937             70-74 C
938             0-69 F
939              
940             Generate the shell script that will send the mail out:
941              
942             cat DATA/grades | dbformmail test1.txt > test1.sh
943              
944             And run it:
945              
946             sh
947              
948             The last two steps can be combined:
949              
950             cat DATA/grades | dbformmail test1.txt | sh
951              
952             but I like to keep a copy of exactly what I send.
953              
954              
955             At the end of the semester you'll want to compute grade totals and
956             assign letter grades. Both fall out of dbroweval.
957             For example, to compute weighted total grades with a 40% midterm/60%
958             final where the midterm is 84 possible points and the final 100:
959              
960             dbcol -rv total |
961             dbcolcreate total - |
962             dbroweval '
963             _total = .40 * _midterm/84.0 + .60 * _final/100.0;
964             _total = sprintf("%4.2f", _total);
965             if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' |
966             dbcolneaten
967              
968              
969             If you got the data originally from a spreadsheet, save it in
970             "tab-delimited" format and convert it with tabdelim_to_db
971             (run tabdelim_to_db -? for examples).
972              
973              
974             =head1 A PASSWORD EXAMPLE
975              
976             To convert the Unix password file to db:
977              
978             cat /etc/passwd | sed 's/:/ /g'| \
979             dbcoldefine -F S login password uid gid gecos home shell \
980             >passwd.fsdb
981              
982             To convert the group file
983              
984             cat /etc/group | sed 's/:/ /g' | \
985             dbcoldefine -F S group password gid members \
986             >group.fsdb
987              
988             To show the names of the groups that div7-members are in
989             (assuming DIV7 is in the gecos field):
990              
991             cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \
992             dbjoin -i - -i group.fsdb gid | dbcol login group
993              
994              
995             =head1 SHORT EXAMPLES
996              
997             Which Fsdb programs are the most complicated (based on number of test cases)?
998              
999             ls TEST/*.cmd | \
1000             dbcoldefine test | \
1001             dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \
1002             dbrowuniq -c | \
1003             dbsort -nr count | \
1004             dbcolneaten
1005              
1006             (Answer: L, then L, L and L.)
1007              
1008              
1009             Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)?
1010              
1011             cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments
1012              
1013             cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments
1014              
1015              
1016             Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming
1017             there's a common student id in column "id":
1018              
1019             dbcol id hw1 t.fsdb
1020              
1021             dbjoin -a -e - grades.fsdb t.fsdb id | \
1022             dbsort name | \
1023             dbcolneaten >new_grades.fsdb
1024              
1025              
1026             Merging two fsdb files with the same rows:
1027              
1028             cat file1.fsdb file2.fsdb >output.fsdb
1029              
1030             or if you want to clean things up a bit
1031              
1032             cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb
1033              
1034             or if you want to know where the data came from
1035              
1036             for i in 1 2
1037             do
1038             dbcolcreate source $i < file$i.fsdb
1039             done >output.fsdb
1040              
1041             (assumes you're using a Bourne-shell compatible shell, not csh).
1042            
1043              
1044             =head1 WARNINGS
1045              
1046             As with any tool, one should (which means I) understand
1047             the limits of the tool.
1048              
1049             All Fsdb tools should run in I.
1050             In some cases (such as F with quartiles, where the whole input
1051             must be re-read), programs will spool data to disk if necessary.
1052              
1053             Most tools buffer one or a few lines of data, so memory
1054             will scale with the size of each line.
1055             (So lines with many columns, or when columns have lots data,
1056             may cause large memory consumption.)
1057              
1058             All Fsdb tools should run in constant or at worst C time.
1059              
1060             All Fsdb tools use normal Perl math routines for computation.
1061             Although I make every attempt to choose numerically stable algorithms
1062             (although I also welcome feedback and suggestions for improvement),
1063             normal rounding due to computer floating point approximations
1064             can result in inaccuracies when data spans a large range of precision.
1065             (See for example the F test cases.)
1066              
1067             Any requirements and limitations of each Fsdb tool
1068             is documented on its manual page.
1069              
1070             If any Fsdb program violates these assumptions,
1071             that is a bug that should be documented
1072             on the tool's manual page or ideally fixed.
1073              
1074             Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have
1075             some bugs. Fsdb should work on perl from version 5.10 onward.
1076              
1077              
1078             =head1 HISTORY
1079              
1080             There have been three versions of Fsdb;
1081             fsdb 1.0 is a complete re-write of the pre-1995 versions,
1082             and was
1083             distributed from 1995 to 2007.
1084             Fsdb 2.0 is a significant re-write of the 1.x versions
1085             for reasons described below.
1086              
1087             Fsdb (in its various forms) has been used extensively by its author
1088             since 1991. Since 1995 it's been used by two other researchers at
1089             UCLA and several at ISI. In February 1998 it was announced to the
1090             Internet. Since then it has found a few users, some outside where I
1091             work.
1092              
1093             =head2 Fsdb 2.0 Rationale
1094              
1095             I've thought about fsdb-2.0 for many years, but it was started
1096             in earnest in 2007. Fsdb-2.0 has the following goals:
1097              
1098             =over 4
1099              
1100             =item in-one-process processing
1101              
1102             While fsdb is great on the Unix command line as a pipeline between
1103             programs, it should I be possible to set it up to run in a single
1104             process. And if it does so, it should be able to avoid serializing
1105             and deserializing (converting to and from text) data between each module.
1106             (Accomplished in fsdb-2.0: see L, although still needs tuning.)
1107              
1108             =item clean IO API
1109              
1110             Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is
1111             very, very crufty. More than just being ugly (but it was that too),
1112             this made things reading from one format file and writing to another
1113             the application's job, when it should be the library's.
1114             (Accomplished in fsdb-1.15 and improved in 2.0: see L.)
1115              
1116             =item normalized module APIs
1117              
1118             Because fsdb modules were added as needed over 10 years,
1119             sometimes the module APIs became inconsistent.
1120             (For example, the 1.x C required an empty
1121             value following the name of the new column,
1122             but other programs specify empty values with the C<-e> argument.)
1123             We should smooth over these inconsistencies.
1124             (Accomplished as each module was ported in 2.0 through 2.7.)
1125              
1126             =item everyone handles all input formats
1127              
1128             Given a clean IO API, the distinction between "colized"
1129             and "listized" fsdb files should go away. Any program
1130             should be able to read and write files in any format.
1131             (Accomplished in fsdb-2.1.)
1132              
1133             =back
1134              
1135             Fsdb-2.0 preserves backwards compatibility where possible,
1136             but breaks it where necessary to accomplish the above goals.
1137             In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions.
1138             Benchmarking in 2013 showed that threading performed much worse than
1139             just using pipes, so Fsdb-2.44 uses threading "style",
1140             but implemented with processes (via my "Freds" library).
1141              
1142             =head2 Contributors
1143              
1144             Fsdb includes code ported from Geoff Kuenning (C).
1145              
1146             Fsdb contributors:
1147             Ashvin Goel F,
1148             Geoff Kuenning F,
1149             Vikram Visweswariah F,
1150             Kannan Varadahan F,
1151             Lars Eggert F,
1152             Arkadi Gelfond F,
1153             David Graff F,
1154             Haobo Yu F,
1155             Pavlin Radoslavov F,
1156             Graham Phillips,
1157             Yuri Pradkin,
1158             Alefiya Hussain,
1159             Ya Xu,
1160             Michael Schwendt,
1161             Fabio Silva F,
1162             Jerry Zhao F,
1163             Ning Xu F,
1164             Martin Lukac F,
1165             Xue Cai,
1166             Michael McQuaid,
1167             Christopher Meng,
1168             Calvin Ardi,
1169             H. Merijn Brand,
1170             Lan Wei.
1171              
1172             Fsdb includes datasets contributed from NIST (F),
1173             from
1174             L,
1175             the NIST/SEMATECH e-Handbook of Statistical Methods, section
1176             1.4.2.8.1. Background and Data. The source is public domain, and
1177             reproduced with permission.
1178              
1179              
1180              
1181              
1182             =head1 RELATED WORK
1183              
1184             As stated in the introduction, Fsdb is an incompatible reimplementation
1185             of the ideas found in C. By storing data in simple text files and
1186             processing it with pipelines it is easy to experiment (in the shell)
1187             and look at the output. The original implementation of this idea was
1188             /rdb, a commercial product described in the book I
1189             database management: application development in the UNIX environment>
1190             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web
1191             page L).
1192              
1193             While Fsdb is inspired by Rdb, it includes no code from it,
1194             and Fsdb makes several different design choices.
1195             In particular: rdb attempts to be closer to a "real" database,
1196             with provision for locking, file indexing.
1197             Fsdb focuses on single user use and so eschews these choices.
1198             Rdb also has some support for interactive editing.
1199             Fsdb leaves editing to text editors like emacs or vi.
1200              
1201             In August, 2002 I found out Carlo Strozzi extended RDB with his
1202             package NoSQL L. According to
1203             Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of
1204             RDB. Although I haven't found Perl startup overhead to be a big
1205             problem on my platforms (from old Sparcstation IPCs to 2GHz
1206             Pentium-4s), you may want to evaluate his system.
1207             The Linux Journal has a description of NoSQL
1208             at L.
1209             It seems quite similar to Fsdb.
1210             Like /rdb, NoSQL supports indexing (not present in Fsdb).
1211             Fsdb appears to have richer support for statistics,
1212             and, as of Fsdb-2.x, its support for Perl threading may support
1213             faster performance (one-process, less serialization and deserialization).
1214              
1215              
1216             =head1 RELEASE NOTES
1217              
1218             Versions prior to 1.0 were released informally on my web page
1219             but were not announced.
1220              
1221             =head2 0.0 1991
1222              
1223             started for my own research use
1224              
1225             =head2 0.1 26-May-94
1226              
1227             first check-in to RCS
1228              
1229             =head2 0.2 15-Mar-95
1230              
1231             parts now require perl5
1232              
1233             =head2 1.0, 22-Jul-97
1234              
1235             adds autoconf support and a test script.
1236              
1237             =head2 1.1, 20-Jan-98
1238              
1239             support for double space field separators, better tests
1240              
1241             =head2 1.2, 11-Feb-98
1242              
1243             minor changes and release on comp.lang.perl.announce
1244              
1245             =head2 1.3, 17-Mar-98
1246              
1247             =over 4
1248              
1249             =item *
1250             adds median and quartile options to dbstats
1251              
1252              
1253             =item *
1254              
1255             adds dmalloc_to_db converter
1256              
1257              
1258             =item *
1259              
1260             fixes some warnings
1261              
1262              
1263             =item *
1264              
1265             dbjoin now can run on unsorted input
1266              
1267              
1268             =item *
1269              
1270             fixes a dbjoin bug
1271              
1272              
1273             =item *
1274              
1275             some more tests in the test suite
1276              
1277             =back
1278              
1279             =head2 1.4, 27-Mar-98
1280              
1281             =over 4
1282              
1283             =item *
1284              
1285             improves error messages (all should now report the program that makes the error)
1286              
1287             =item *
1288              
1289             fixed a bug in dbstats output when the mean is zero
1290              
1291             =back
1292              
1293             =head2 1.5, 25-Jun-98
1294              
1295             =over 4
1296              
1297             =item BUG FIX
1298             dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats
1299              
1300             =item NEW
1301             dbcolstats computes zscores and tscores over a column
1302              
1303             =item NEW
1304             dbcolscorrelate computes correlation coefficients between two columns
1305              
1306             =item INTERNAL
1307             ficus_getopt.pl has been replaced by DbGetopt.pm
1308              
1309             =item BUG FIX
1310             all tests are now ``portable'' (previously some tests ran only on my system)
1311              
1312             =item BUG FIX
1313             you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond)
1314              
1315             =item BUG FIX
1316             installation no longer uses cp -f (to work on SunOS 4)
1317              
1318             =back
1319              
1320             =head2 1.6, 24-May-99
1321              
1322             =over 4
1323              
1324             =item NEW
1325             dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary)
1326              
1327             =item NEW
1328             dbcolmovingstats does moving means over a series of data
1329              
1330             =item NEW
1331             dbcol has a -v option to get all columns except those listed
1332              
1333             =item NEW
1334             dbmultistats does quartiles and medians
1335              
1336             =item NEW
1337             dbstripextraheaders now also cleans up bogus comments before the fist header
1338              
1339             =item BUG FIX
1340             dbcolneaten works better with double-space-separated data
1341              
1342             =back
1343              
1344             =head2 1.7, 5-Jan-00
1345              
1346             =over 4
1347              
1348             =item NEW
1349             dbcolize now detects and rejects lines that contain embedded copies of the field separator
1350              
1351             =item NEW
1352             configure tries harder to prevent people from improperly configuring/installing fsdb
1353              
1354             =item NEW
1355             tcpdump_to_db converter (incomplete)
1356              
1357             =item NEW
1358             tabdelim_to_db converter: from spreadsheet tab-delimited files to db
1359              
1360             =item NEW
1361             mailing lists for fsdb are C and C
1362              
1363             To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message.
1364              
1365             =item BUG FIX
1366             dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case.
1367              
1368             =item BUG FIX
1369             the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency.
1370              
1371             =item BUG FIX (DOCUMENTATION)
1372             dbcolmovingstats had incorrect sample output.
1373              
1374             =back
1375              
1376             =head2 1.8, 28-Jun-00
1377              
1378             =over 4
1379              
1380             =item BUG FIX
1381             header options are now preserved when writing with dblistize
1382              
1383             =item NEW
1384             dbrowuniq now optionally checks for uniqueness only on certain fields
1385              
1386             =item NEW
1387             dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields
1388              
1389             =item NEW
1390             converter for "crl" format network traces
1391              
1392             =item NEW
1393             anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo.
1394              
1395             =item OPTIMIZATION
1396             comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.)
1397              
1398             =item BUG FIX
1399             dbstats with median/quartiles now correctly handles singleton data points.
1400              
1401             =back
1402              
1403             =head2 1.9, 6-Nov-00
1404              
1405             =over 4
1406              
1407             =item NEW
1408             dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov).
1409              
1410             =item BUG FIX
1411             dbsort now works with perl-5.6
1412              
1413             =back
1414              
1415             =head2 1.10, 10-Apr-01
1416              
1417             =over 4
1418              
1419             =item BUG FIX
1420             dbstats now handles the case where there are more n-tiles than data
1421              
1422             =item NEW
1423             dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu)
1424              
1425             =item BUG FIX
1426             dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu)
1427              
1428             =item BUG FIX
1429             cleanup of temporary files is slightly better
1430              
1431             =back
1432              
1433             =head2 1.11, 2-Nov-01
1434              
1435             =over 4
1436              
1437             =item BUG FIX
1438             dbcolneaten now runs in constant memory
1439              
1440             =item NEW
1441             dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be
1442              
1443             =item OPTIMIZATION
1444             dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001)
1445              
1446             =item INTERNAL
1447             t_distr now ported to perl5 module DbTDistr
1448              
1449             =back
1450              
1451             =head2 1.12, 30-Oct-02
1452              
1453             =over 4
1454              
1455             =item BUG FIX
1456             dbmultistats documentation typo fixed
1457              
1458             =item NEW
1459             dbcolmultiscale
1460              
1461             =item NEW
1462             dbcol has -r option for "relaxed error checking"
1463              
1464             =item NEW
1465             dbcolneaten has new -e option to strip end-of-line spaces
1466              
1467             =item NEW
1468             dbrow finally has a -v option to negate the test
1469              
1470             =item BUG FIX
1471             math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases)
1472              
1473             =item BUG FIX
1474             some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.)
1475              
1476             =item BUG FIX
1477             fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem)
1478              
1479             =item NEW
1480             db_to_html_table: simple conversion to html tables (NO fancy stuff)
1481              
1482             =item NEW
1483             dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML
1484              
1485             =back
1486              
1487              
1488             =head2 1.13, 4-Feb-04
1489              
1490              
1491             =over 4
1492              
1493             =item NEW
1494             fsdb added to the freebsd ports tree L. Maintainer: C
1495              
1496             =item BUG FIX
1497             properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C.
1498              
1499             =item NEW
1500             dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README.
1501              
1502             =item NEW
1503             cgi_to_db converts CGI.pm-format storage to fsdb list format
1504              
1505             =item BUG FIX
1506             handle numeric synonyms for column names in dbcol properly
1507              
1508             =item ENHANCEMENT
1509             "talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert.
1510              
1511             =item CHANGE
1512             dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root)
1513              
1514             =item NEW
1515             on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode
1516              
1517             =item NEW
1518             dbfilevalidate: check a db file for some common errors
1519              
1520             =back
1521              
1522              
1523             =head2 1.14, 24-Aug-06
1524              
1525             =over 4
1526              
1527              
1528             =item ENHANCEMENT
1529             README cleanup
1530              
1531             =item INCOMPATIBLE CHANGE
1532             dbcolsplit renamed dbcolsplittocols
1533              
1534             =item NEW
1535             dbcolsplittorows split one column into multiple rows
1536              
1537             =item NEW
1538             dbcolsregression compute linear regression and correlation for two columns
1539              
1540             =item ENHANCEMENT
1541             cvs_to_db: better error handling, normalize field names, skip blank lines
1542              
1543             =item ENHANCEMENT
1544             dbjoin now detects (and fails) if non-joined files have duplicate names
1545              
1546             =item BUG FIX
1547             minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors)
1548              
1549             =back
1550              
1551             =head2 1.15, 12-Nov-07
1552              
1553             =over 4
1554              
1555             =item NEW
1556             fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.)
1557              
1558             =item NEW
1559             Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4.
1560              
1561             =item NEW
1562             dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats.
1563              
1564             =item ENHANCEMENT
1565             fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace.
1566              
1567             =item ENHANCEMENT
1568             dblib.pl is now moved to Fsdb::Old.pm.
1569              
1570             =item BUG FIX
1571             dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!).
1572              
1573             =item ENHANCEMENT
1574             dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.)
1575              
1576             =item ENHANCEMENT
1577             adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way.
1578              
1579             =item BUG FIX
1580             dbmultistats memory usage corrected (O(# tags), not O(1))
1581              
1582             =item ENHANCEMENT
1583             dbmultistats can now optionally run with pre-grouped input in O(1) memory
1584              
1585             =item ENHANCEMENT
1586             dbroweval -N was finally implemented (eat comments)
1587              
1588             =back
1589              
1590             =head2 2.0, 25-Jan-08
1591              
1592             2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete)
1593              
1594             =over 4
1595              
1596             =item ENHANCEMENT:
1597             shifting old programs to Perl modules, with
1598             the front-end program as just a wrapper.
1599             In the short-term, this change just means programs have real man pages.
1600             In the long-run, it will mean that one can run a pipeline in a single
1601             Perl program.
1602             So far:
1603             L,
1604             L,
1605             the new L.
1606             L
1607             the new L,
1608             the old C (renamed L),
1609             L,
1610             L,
1611              
1612             =item NEW:
1613             L is an internal-only module that lets one
1614             use fsdb commands from within perl (via threads).
1615              
1616             It also provides perl function aliases for the internal modules,
1617             so a string of fsdb commands in perl are nearly as terse as in the
1618             shell:
1619              
1620             use Fsdb::Filter::dbpipeline qw(:all);
1621             dbpipeline(
1622             dbrow(qw(name test1)),
1623             dbroweval('_test1 += 5;')
1624             );
1625              
1626             =item INCOMPATIBLE CHANGE:
1627             The old L has been renamed L.
1628             The new L does the same thing as the old L.
1629             This incompatibility is unfortunate but normalizes program names.
1630              
1631             =item CHANGE:
1632             The new L program
1633             always outputs C<-> (the default empty value) for
1634             statistics it cannot compute (for example, standard deviation
1635             if there is only one row),
1636             instead of the old mix of C<-> and "na".
1637              
1638             =item INCOMPATIBLE CHANGE:
1639             The old L program, now called L,
1640             also has different arguments. The C<-t mean,stddev> option is now
1641             C<--tmean mean --tstddev stddev>. See L for details.
1642              
1643             =item INCOMPATIBLE CHANGE:
1644             L now assumes all new columns get the default
1645             value rather than requiring each column to have an initial constant value.
1646             To change the initial value, sue the new C<-e> option.
1647              
1648             =item NEW:
1649             L counts rows, an almost-subset of L's C output
1650             (except without differentiating numeric/non-numeric input),
1651             or the equivalent of C.
1652              
1653             =item NEW:
1654             L merges two sorted files.
1655             This functionality was previously embedded in L.
1656              
1657             =item INCOMPATIBLE CHANGE:
1658             L's C<-i> option to include non-matches
1659             is now renamed C<-a>, so as to not conflict with the new
1660             standard option C<-i> for input file.
1661              
1662             =back
1663              
1664             =head2 2.1, 6-Apr-08
1665              
1666             2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format
1667              
1668             =over 4
1669              
1670             =item ENHANCEMENT:
1671             shifting more old programs to Perl modules.
1672             New in 2.1:
1673             L,
1674             L,
1675             L,
1676             L,
1677             L,
1678             L
1679              
1680             =item ENHANCEMENT
1681             L now handles an arbitrary number of input files,
1682             not just exactly two.
1683              
1684             =item NEW
1685             L is an internal routine that handles merging exactly two files.
1686              
1687             =item INCOMPATIBLE CHANGE
1688             L now specifies inputs like L,
1689             rather than assuming the first two arguments were tables (as in fsdb-1).
1690              
1691             The old L argument C<-i> is now C<-a> or <--type=outer>.
1692              
1693             A minor change: comments in the source files for
1694             L are now intermixed with output
1695             rather than being delayed until the end.
1696              
1697             =item ENHANCEMENT
1698             L now no longer produces warnings when null values are
1699             passed to numeric comparisons.
1700              
1701             =item BUG FIX
1702             L now once again works with code that lacks a trailing semicolon.
1703             (This bug fixes a regression from 1.15.)
1704              
1705             =item INCOMPATIBLE CHANGE
1706             L's old C<-e> option (to avoid end-of-line spaces) is now C<-E>
1707             to avoid conflicts with the standard empty field argument.
1708              
1709             =item INCOMPATIBLE CHANGE
1710             L's old C<-e> option is now C<-E> to avoid conflicts.
1711             And its C<-n>, C<-s>, and C<-w> are now
1712             C<-N>, C<-S>, and C<-W> to correspond.
1713              
1714             =item NEW
1715             L replaces L, L, and L,
1716             but with different options.
1717              
1718             =item ENHANCEMENT
1719             The library routines C now understand both list-format
1720             and column-format data, so all converted programs can now
1721             I read either format. This capability was one
1722             of the milestone goals for 2.0, so yea!
1723              
1724             =back
1725              
1726             =head2 2.2, 23-May-08
1727              
1728             Release 2.2 is another 2.x alpha release. Now I of the
1729             commands are ported, but a few remain, and I plan one last
1730             incompatible change (to the file header) before 2.x final.
1731              
1732             =over 4
1733              
1734             =item ENHANCEMENT
1735              
1736             shifting more old programs to Perl modules.
1737             New in 2.2:
1738             L,
1739             L.
1740             L.
1741             L.
1742             L.
1743             L.
1744             L.
1745             L.
1746             L.
1747             L.
1748             L.
1749             Also
1750             L
1751             exists only as a front-end (command-line) program.
1752              
1753             =item INCOMPATIBLE CHANGE
1754              
1755             The following programs have been dropped from fsdb-2.x:
1756             L,
1757             L,
1758             L,
1759             L.
1760              
1761             =item NEW
1762              
1763             L to convert Apache logfiles
1764              
1765             =item INCOMPATIBLE CHANGE
1766              
1767             Options to L are now B<-B> and B<-I>,
1768             not B<-a> and B<-i>.
1769              
1770             =item INCOMPATIBLE CHANGE
1771              
1772             L is now L.
1773              
1774             =item BUG FIXES
1775              
1776             L better handles empty columns;
1777             L warning suppressed (actually a bug in high-bucket handling).
1778              
1779             =item INCOMPATIBLE CHANGE
1780              
1781             L now requires a C<-k> option in front of the
1782             key (tag) field, or if none is given, it will group by the first field
1783             (both like L).
1784              
1785             =item KNOWN BUG
1786              
1787             L with quantile option doesn't work currently.
1788              
1789             =item INCOMPATIBLE CHANGE
1790              
1791             L is renamed L.
1792              
1793             =item BUG FIXES
1794              
1795             L was leaving its log message as a command, not a comment.
1796             Oops. No longer.
1797              
1798             =back
1799              
1800             =head2 2.3, 27-May-08 (alpha)
1801              
1802             Another alpha release, this one just to fix the critical dbjoin bug
1803             listed below (that happens to have blocked my MP3 jukebox :-).
1804              
1805             =over 4
1806              
1807             =item BUG FIX
1808              
1809             Dbsort no longer hangs if given an input file with no rows.
1810              
1811             =item BUG FIX
1812              
1813             Dbjoin now works with unsorted input coming from a pipeline (like stdin).
1814             Perl-5.8.8 has a bug (?) that was making this case fail---opening
1815             stdin in one thread, reading some, then reading more in a different
1816             thread caused an lseek which works on files, but fails on pipes like stdin.
1817             Go figure.
1818              
1819             =item BUG FIX / KNOWN BUG
1820              
1821             The dbjoin fix also fixed dbmultistats -q
1822             (it now gives the right answer).
1823             Although a new bug appeared, messages like:
1824             Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction.
1825             So the dbmultistats_quartile test is still disabled.
1826              
1827             =back
1828              
1829             =head2 2.4, 18-Jun-08
1830              
1831             Another alpha release, mostly to fix minor usability
1832             problems in dbmapreduce and client functions.
1833              
1834             =over 4
1835              
1836             =item ENHANCEMENT
1837              
1838             L now defaults to running user supplied code without warnings
1839             (as with fsdb-1.x).
1840             Use C<--warnings> or C<-w> to turn them back on.
1841              
1842             =item ENHANCEMENT
1843              
1844             L can now write different format output
1845             than the input, using the C<-m> option.
1846              
1847             =item KNOWN BUG
1848              
1849             L emits warnings on perl 5.10.0
1850             about "Unbalanced string table refcount" and "Scalars leaked"
1851             when run with an external program as a reducer.
1852              
1853             L emits the warning "Attempt to free unreferenced scalar"
1854             when run with quartiles.
1855              
1856             In each case the output is correct.
1857             I believe these can be ignored.
1858              
1859             =item CHANGE
1860              
1861             L no longer logs a line for each reducer that is invoked.
1862              
1863             =back
1864              
1865              
1866             =head2 2.5, 24-Jun-08
1867              
1868             Another alpha release, fixing more minor bugs in
1869             C and lossage in C.
1870              
1871             =over 4
1872              
1873             =item ENHANCEMENT
1874              
1875             L can now tolerate non-map-aware reducers
1876             that pass back the key column in put.
1877             It also passes the current key as the last argument to
1878             external reducers.
1879              
1880             =item BUG FIX
1881              
1882             L, correctly handle C<-header> option again.
1883             (Broken since fsdb-2.3.)
1884              
1885             =back
1886              
1887              
1888             =head2 2.6, 11-Jul-08
1889              
1890             Another alpha release, needed to fix DaGronk.
1891             One new port, small bug fixes, and important fix to L.
1892              
1893             =over 4
1894              
1895             =item ENHANCEMENT
1896              
1897             shifting more old programs to Perl modules.
1898             New in 2.2:
1899             L.
1900              
1901             =item INCOMPATIBLE CHANGE and ENHANCEMENTS
1902             L arguments changed,
1903             use C<--rank> to require ranking instead of C<-r>.
1904             Also, C<--ascending> and C<--descending> can now be specified separately,
1905             both for C<--percentile> and C<--rank>.
1906              
1907             =item BUG FIX
1908              
1909             Sigh, the sense of the --warnings option in L was inverted. No longer.
1910              
1911             =item BUG FIX
1912              
1913             I found and fixed the string leaks (errors like "Unbalanced string
1914             table refcount" and "Scalars leaked") in L and L.
1915             (All Cs in threads must be manually destroyed.)
1916              
1917             =item BUG FIX
1918              
1919             The C<-C> option to specify the column separator in L
1920             now works again (broken since it was ported).
1921              
1922             =back
1923              
1924             2.7, 30-Jul-08 beta
1925              
1926             The beta release of fsdb-2.x. Finally, all programs are ported.
1927             As statistics, the number of lines of non-library code doubled from
1928             7.5k to 15.5k. The libraries are much more complete,
1929             going from 866 to 5164 lines.
1930             The overall number of programs is about the same,
1931             although 19 were dropped and 11 were added.
1932             The number of test cases has grown from 116 to 175.
1933             All programs are now in perl-5, no more shell scripts or perl-4.
1934             All programs now have manual pages.
1935              
1936             Although this is a major step forward, I still expect
1937             to rename "fsdb" to "fsdb".
1938              
1939             =over 4
1940              
1941             =item ENHANCEMENT
1942              
1943             shifting more old programs to Perl modules.
1944             New in 2.7:
1945             L.
1946             L.
1947             L.
1948             L.
1949             L.
1950             L,
1951             L,
1952             L,
1953             L,
1954             L,
1955             L.
1956              
1957             =item INCOMPATIBLE CHANGE
1958              
1959             The following programs have been dropped from fsdb-2.x:
1960             L,
1961             L,
1962             L.
1963             L.
1964             They may come back, but seemed overly specialized.
1965             The following program
1966             L
1967             was dropped because it is superseded by L.
1968             L
1969             was dropped pending a test cases and examples.
1970              
1971             =item ENHANCEMENT
1972              
1973             L now has a C<-c> option to correct errors.
1974              
1975             =item NEW
1976              
1977             L provides the inverse of
1978             L.
1979              
1980             =back
1981              
1982              
1983             =head2 2.8, 5-Aug-08
1984              
1985             Change header format, preserving forwards compatibility.
1986              
1987             =over 4
1988              
1989             =item BUG FIX
1990              
1991             Complete editing pass over the manual, making sure it aligns
1992             with fsdb-2.x.
1993              
1994             =item SEMI-COMPATIBLE CHANGE
1995              
1996             The header of fsdb files has changed, it is now #fsdb, not #h (or #L)
1997             and parsing of -F and -R are also different.
1998             See L for the new specification.
1999             The v1 file format will be read, compatibly, but
2000             not written.
2001              
2002             =item BUG FIX
2003              
2004             L now tolerates comments that precede the first key,
2005             instead of failing with an error message.
2006              
2007             =back
2008              
2009              
2010             =head2 2.9, 6-Aug-08
2011              
2012             Still in beta; just a quick bug-fix for L.
2013              
2014             =over 4
2015              
2016             =item ENHANCEMENT
2017              
2018             L now generates plausible output when given no rows
2019             of input.
2020              
2021             =back
2022              
2023             =head2 2.10, 23-Sep-08
2024              
2025             Still in beta, but picking up some bug fixes.
2026              
2027             =over 4
2028              
2029             =item ENHANCEMENT
2030              
2031             L now generates plausible output when given no rows
2032             of input.
2033              
2034             =item ENHANCEMENT
2035              
2036             L the warnings option was backwards;
2037             now corrected. As a result, warnings in user code now default off
2038             (like in fsdb-1.x).
2039              
2040             =item BUG FIX
2041              
2042             L now defaults to assuming the target column is numeric.
2043             The new option C<-N> allows selection of a non-numeric target.
2044              
2045             =item BUG FIX
2046              
2047             L now includes C<--sample> and C<--nosample> options
2048             to compute the sample or full population correlation coefficients.
2049             Thanks to Xue Cai for finding this bug.
2050              
2051             =back
2052              
2053              
2054             =head2 2.11, 14-Oct-08
2055              
2056             Still in beta, but picking up some bug fixes.
2057              
2058             =over 4
2059              
2060             =item ENHANCEMENT
2061              
2062             L is now more aggressive about filling in empty cells
2063             with the official empty value, rather than leaving them blank or as whitespace.
2064              
2065             =item ENHANCEMENT
2066              
2067             L now catches failures during pipeline element setup
2068             and exits reasonably gracefully.
2069              
2070             =item BUG FIX
2071              
2072             L now reaps child processes, thus avoiding
2073             running out of processes when used a lot.
2074              
2075             =back
2076              
2077             =head2 2.12, 16-Oct-08
2078              
2079             Finally, a full (non-beta) 2.x release!
2080              
2081             =over 4
2082              
2083             =item INCOMPATIBLE CHANGE
2084              
2085             Jdb has been renamed Fsdb, the flatfile-streaming database.
2086             This change affects all internal Perl APIs,
2087             but no shell command-level APIs.
2088             While Jdb served well for more than ten years,
2089             it is easily confused with the Java debugger (even though Jdb was there first!).
2090             It also is too generic to work well in web search engines.
2091             Finally, Jdb stands for ``John's database'', and we're a bit beyond that.
2092             (However, some call me the ``file-system guy'', so
2093             one could argue it retains that meeting.)
2094              
2095             If you just used the shell commands, this change should not affect you.
2096             If you used the Perl-level libraries directly in your code,
2097             you should be able to rename "Jdb" to "Fsdb" to move to 2.12.
2098              
2099             The jdb-announce list not yet been renamed, but it will be shortly.
2100              
2101             With this release I've accomplished everything I wanted to
2102             in fsdb-2.x. I therefore expect to return to boring, bugfix releases.
2103              
2104             =back
2105              
2106             =head2 2.13, 30-Oct-08
2107              
2108             =over 4
2109              
2110             =item BUG FIX
2111              
2112             L now treats non-numeric data as zero by default.
2113              
2114             =item BUG FIX
2115              
2116             Fixed a perl-5.10ism in L that
2117             breaks that program under 5.8.
2118             Thanks to Martin Lukac for reporting the bug.
2119              
2120             =back
2121              
2122             =head2 2.14, 26-Nov-08
2123              
2124             =over 4
2125              
2126             =item BUG FIX
2127              
2128             Improved documentation for L's C<-f> option.
2129              
2130             =item ENHANCEMENT
2131              
2132             L how computes a moving standard deviation in addition
2133             to a moving mean.
2134              
2135             =back
2136              
2137              
2138             =head2 2.15, 13-Apr-09
2139              
2140             =over 4
2141              
2142             =item BUG FIX
2143              
2144             Fix a F bug reported by Shalindra Fernando.
2145              
2146             =back
2147              
2148              
2149             =head2 2.16, 14-Apr-09
2150              
2151             =over 4
2152              
2153             =item BUG FIX
2154              
2155             Another minor release bug: on some systems F looses
2156             executable permissions. Again reported by Shalindra Fernando.
2157              
2158             =back
2159              
2160             =head2 2.17, 25-Jun-09
2161              
2162             =over 4
2163              
2164             =item TYPO FIXES
2165              
2166             Typo in the F manual fixed.
2167              
2168             =item IMPROVEMENT
2169              
2170             There is no longer a comment line to label columns
2171             in F, instead the header line is tweaked to
2172             line up. This change restores the Jdb-1.x behavior, and
2173             means that repeated runs of dbcolneaten no longer add comment lines
2174             each time.
2175              
2176             =item BUG FIX
2177              
2178             It turns out F was not correctly handling trailing spaces
2179             when given the C<-E> option to suppress them. This regression is now
2180             fixed.
2181              
2182             =item EXTENSION
2183              
2184             L can now handle direct references to the last row
2185             via F<$lfref>, a dubious but now documented feature.
2186              
2187             =item BUG FIXES
2188              
2189             Separators set with C<-C> in F and F
2190             were not properly
2191             setting the heading, and null fields were not recognized.
2192             The first bug was reported by Martin Lukac.
2193              
2194             =back
2195              
2196             =head2 2.18, 1-Jul-09 A minor release
2197              
2198             =over 4
2199              
2200             =item IMPROVEMENT
2201              
2202             Documentation for F has been improved.
2203              
2204             =item IMPROVEMENT
2205              
2206             The package should now be PGP-signed.
2207              
2208             =back
2209              
2210              
2211             =head2 2.19, 10-Jul-09
2212              
2213             =over 4
2214              
2215             =item BUG FIX
2216              
2217             Internal improvements to debugging output and robustness of
2218             F and F.
2219             F re-enabled.
2220              
2221             =back
2222              
2223              
2224             =head2 2.20, 30-Nov-09
2225             (A collection of minor bugfixes, plus a build against Fedora 12.)
2226              
2227             =over 4
2228              
2229             =item BUG FIX
2230              
2231             Loging for
2232             F
2233             with code refs is now stable
2234             (it no longer includes a hex pointer to the code reference).
2235              
2236             =item BUG FIX
2237              
2238             Better handling of mixed blank lines in F
2239             (see test case F).
2240              
2241             =item BUG FIX
2242              
2243             F now handles multi-line input better,
2244             and handles tables with COLSPAN.
2245              
2246             =item BUG FIX
2247              
2248             F now cleans up threads in an C
2249             to prevent "cannot detach a joined thread" errors that popped
2250             up in perl-5.10. Hopefully this prevents a race condition
2251             that causes the test suites to hang about 20% of the time
2252             (in F).
2253              
2254             =item IMPROVEMENT
2255              
2256             F now detects and correctly fails
2257             when the input and reducer have incompatible
2258             field separators.
2259              
2260             =item IMPROVEMENT
2261              
2262             F, F, F, F,
2263             and F
2264             now all take an C<-F> option to let one specify the output field separator
2265             (so they work better with F).
2266              
2267             =item BUG FIX
2268              
2269             An omitted C<-k> from the manual page of F
2270             is now there. Bug reported by Unkyu Park.
2271              
2272             =back
2273              
2274              
2275             =head2 2.21, 17-Apr-10
2276             bug fix release
2277              
2278             =over 4
2279              
2280             =item BUG FIX
2281              
2282             F now no longer fails with -outputheader => never
2283             (an obscure bug).
2284              
2285             =item IMPROVEMENT
2286              
2287             F (in the warnings section)
2288             and F now more carefully document how they
2289             handle (and do not handle) numerical precision problems,
2290             and other general limits. Thanks to Yuri Pradkin for prompting
2291             this documentation.
2292              
2293             =item IMPROVEMENT
2294              
2295             C
2296             is now restored from C.
2297              
2298             =item IMPROVEMENT
2299              
2300             Documention for multiple styles of input approaches
2301             (including performance description) added to L.
2302              
2303             =back
2304              
2305             =head2 2.22, 2010-10-31
2306             One new tool F and several bug fixes for Perl 5.10.
2307              
2308             =over 4
2309              
2310             =item BUG FIX
2311              
2312             F now correctly handles n-way merges.
2313             Bug reported by Yuri Pradkin.
2314              
2315             =item INCOMPARABLE CHANGE
2316              
2317             F now defaults to I padding the last column.
2318              
2319             =item ADDITION
2320              
2321             F now takes B<-N NewColumn> to give the new
2322             column a name other than "count". Feature requested by Mike Rouch
2323             in January 2005.
2324              
2325             =item ADDITION
2326              
2327             New program F copies the last value of a column
2328             into a new column copylast_column of the next row.
2329             New program requested by Fabio Silva;
2330             useful for converting dbmultistats output into dbrvstatdiff input.
2331              
2332             =item BUG FIX
2333              
2334             Several tools (particularly F and F) would
2335             report errors like "Unbalanced string table refcount: (1) for "STDOUT"
2336             during global destruction" on exit, at least on certain versions
2337             of Perl (for me on 5.10.1), but similar errors have been off-and-on
2338             for several Perl releases. Although I think my code looked
2339             OK, I worked around this problem with a different way of handling
2340             standard IO redirection.
2341              
2342             =back
2343              
2344              
2345             =head2 2.23, 2011-03-10
2346             Several small portability bugfixes; improved F for large datasets
2347              
2348             =over 4
2349              
2350             =item IMPROVEMENT
2351              
2352             Documentation to F was changed to use "sd" to refer to
2353             standard deviation, not "ss" (which might be confused with sum-of-squares).
2354              
2355             =item BUG FIX
2356              
2357             This documentation about F was missing the F<-k> option
2358             in some cases.
2359              
2360             =item BUG FIX
2361              
2362             F was failing on MacOS-10.6.3 for some tests with
2363             the error
2364              
2365             dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl)
2366              
2367             The problem seemed to be only in the error, not in operation.
2368             On MacOS, the error is now suppressed.
2369             Thanks to Alefiya Hussain for providing access to a Mac system
2370             that allowed debugging of this problem.
2371              
2372             =item IMPROVEMENT
2373              
2374             The F command requires an external
2375             Perl library (F). On computers that
2376             lack this optional library, previously Fsdb would configure
2377             with a warning and then test cases would fail.
2378             Now those test cases are skipped with an additional warning.
2379              
2380             =item BUG FIX
2381              
2382             The test suite now supports alternative valid output, as a hack
2383             to account for last-digit floating point differences.
2384             (Not very satisfying :-(
2385              
2386             =item BUG FIX
2387              
2388             F output for confidence intervals on very large
2389             datasets has changed. Previously it failed for more than 2^31-1
2390             records, and handling of T-Distributions with thousands of rows
2391             was a bit dubious. Now datasets with more than 10000 are considered
2392             infinitely large and hopefully correctly handled.
2393              
2394             =back
2395              
2396             =head2 2.24, 2011-04-15
2397             Improvements to fix an old bug in dbmapreduce with different field separators
2398              
2399             =over 4
2400              
2401             =item IMPROVEMENT
2402              
2403             The F command had a C<--correct> option to
2404             work-around from incompatible field-separators,
2405             but it did nothing. Now it does the correct but sad, data-loosing
2406             thing.
2407              
2408             =item IMPROVEMENT
2409              
2410             The F command
2411             previously failed with an error message when invoked
2412             on input with a non-default field separator.
2413             The root cause was the underlying F
2414             that did not handle the case of reducers that generated
2415             output with a different field separator than the input.
2416             We now detect and repair incompatible field separators.
2417             This change corrects a problem originally documented and detected
2418             in Fsdb-2.20.
2419             Bug re-reported by Unkyu Park.
2420              
2421             =back
2422              
2423             =head2 2.25, 2011-08-07
2424             Two new tools, F and F, and a bugfix for two people.
2425              
2426             =over 4
2427              
2428             =item IMPROVEMENT
2429              
2430             F now supports a F<--utc> option,
2431             which also fixes this test case for users outside of the Pacific
2432             time zone. Bug reported by David Graff, and also by Peter Desnoyers
2433             (within a week of each other :-)
2434              
2435             =item NEW
2436              
2437             F can convert simple, very regular XML files into Fsdb.
2438              
2439             =item NEW
2440              
2441             F "pivots" a file, converting multiple rows
2442             corresponding to the same entity into a single row with multiple columns.
2443              
2444             =back
2445              
2446             =head2 2.26, 2011-12-12
2447             Bug fixes, particularly for perl-5.14.2.
2448              
2449             =over 4
2450              
2451             =item BUG FIX
2452              
2453             Bugs fixed in L manual page.
2454              
2455             =item BUG FIX
2456              
2457             Fixed problems where L was truncating floating point numbers
2458             when sorting. This strange behavior happens as of perl-5.14.2 and
2459             it I like a Perl bug. I've worked around it for the test suites,
2460             but I'm a bit nervous.
2461              
2462             =back
2463              
2464             =head2 2.27, 2012-11-15
2465             Accumulated bug fixes.
2466              
2467             =over 4
2468              
2469             =item IMPROVEMENT
2470              
2471             F now reports errors in CVS input with real diagnostics.
2472              
2473             =item IMPROVEMENT
2474              
2475             F can now compute median, when given the C<-m> option.
2476              
2477             =item BUG FIX
2478              
2479             F non-numeric handling (the C<-a> option) now works properly.
2480              
2481             =item DOCUMENTATION
2482              
2483             The internal
2484             F test framework
2485             is now documented.
2486              
2487             =item BUG FIX
2488              
2489             F now correctly handles the case where there is no input
2490             (previously it output a blank line, which is a malformed fsdb file).
2491             Thanks to Yuri Pradkin for reporting this bug.
2492              
2493             =back
2494              
2495             =head2 2.28, 2012-11-15
2496             A quick release to fix most rpmlint errors.
2497              
2498             =over 4
2499              
2500             =item BUG FIX
2501              
2502             Fixed a number of minor release problems (wrong permissions, old FSF
2503             address, etc.) found by rpmlint.
2504              
2505             =back
2506              
2507             =head2 2.29, 2012-11-20
2508             a quick release for CPAN testing
2509              
2510             =over 4
2511              
2512             =item IMPROVEMENT
2513              
2514             Tweaked the RPM spec.
2515              
2516             =item IMPROVEMENT
2517              
2518             Modified F to fail gracefully on Perl installations
2519             that lack threads. (Without this fix, I get massive failures
2520             in the non-ithreads test system.)
2521              
2522             =back
2523              
2524             =head2 2.30, 2012-11-25
2525             improvements to perl portability
2526              
2527             =over 4
2528              
2529             =item BUG FIX
2530              
2531             Removed unicode character in documention of F
2532             so pod tests will pass. (Sigh, that should work :-( )
2533              
2534             =item BUG FIX
2535              
2536             Fixed test suite failures on 5 tests (F
2537             was the first) due to L's addition of a period.
2538             This problem was breaking Fsdb on perl-5.17.
2539             Thanks to Michael McQuaid for helping diagnose this problem.
2540              
2541             =item IMPROVEMENT
2542              
2543             The test suite now prints out the names of tests it tries.
2544              
2545             =back
2546              
2547             =head2 2.31, 2012-11-28
2548             A release with actual improvements to dbfilepivot and dbrowuniq.
2549              
2550             =over 4
2551              
2552             =item BUG FIX
2553              
2554             Documentation fixes: typos in L,
2555             bugs in L,
2556             clarification for comment handling in L.
2557              
2558             =item IMPROVEMENT
2559              
2560             Previously L assumed the input was grouped by keys
2561             and didn't very that pre-condition.
2562             Now there is no pre-condition (it will sort the input by default),
2563             and it checks if the invariant is violated.
2564              
2565             =item BUG FIX
2566              
2567             Previously L failed if the input had comments (oops :-);
2568             no longer.
2569              
2570             =item IMPROVEMENT
2571              
2572             Now L has the C<-L> option to preserve the last
2573             unique row (instead of the first), a common idiom.
2574              
2575             =back
2576              
2577             =head2 2.32, 2012-12-21
2578             Test suites should now be more numerically robust.
2579              
2580             =over 4
2581              
2582             =item NEW
2583              
2584             New L does fsdb-aware file differencing.
2585             It does not do smart intuition of add/removes like Unix diff(1),
2586             but it does know about columns, and with C<-E>, it does
2587             numeric-aware differences.
2588              
2589             =item IMPROVEMENT
2590              
2591             Test suites that are numeric now use L to do numeric-aware
2592             comparisons, so the test suite should now be robust to slightly different
2593             computers and operating systems and compilers than I what I use.
2594              
2595             =back
2596              
2597             =head2 2.33, 2012-12-23
2598             Minor fixes to some test cases.
2599              
2600             =over 4
2601              
2602             =item IMPROVEMENT
2603              
2604             L and L
2605             now supports the C<-N> option to give the new column a
2606             different name. (And a test cases where this duplication mattered
2607             have been fixed.)
2608              
2609             =item IMPROVEMENT
2610              
2611             L now show the t-test breakpoint with a reasonable number of
2612             floating point digits.
2613              
2614             =item BUG FIX
2615              
2616             Fixed a numerical stability problem in the F test case.
2617              
2618             =back
2619              
2620             =head1 WHAT'S NEW
2621              
2622             =head2 2.34, 2013-02-10
2623             Parallelism in L.
2624              
2625             =over 4
2626              
2627             =item IMPROVEMENT
2628              
2629             Documention for L now includes resource requirements.
2630              
2631             =item IMPROVEMENT
2632              
2633             Default memory usage for L is now about 256MB.
2634             (The world keeps moving forward.)
2635              
2636             =item IMPROVEMENT
2637              
2638             L now does merging in parallel.
2639             As a side-effect, L should be faster when
2640             input overflows memory. The level of parallelism
2641             can be limited with the C<--parallelism> option.
2642             (There is more work to do here, but we're off to a start.)
2643              
2644             =back
2645              
2646             =head2 2.35, 2013-02-23
2647             Improvements to dbmerge parallelism
2648              
2649             =over 4
2650              
2651             =item BUG FIX
2652              
2653             Fsdb temporary files are now created more securely (with File::Temp).
2654              
2655             =item IMPROVEMENT
2656              
2657             Programs that sort or merge on fields (L, L, L,
2658             L) now report an error if no fields on which to join or merge
2659             are given.
2660              
2661             =item IMPROVEMENT
2662              
2663             Parallelism in L is should now be more consistent,
2664             with less starting and stopping.
2665              
2666             =item IMPROVEMENT
2667             In L, the C<--xargs> option lets one give input filenames on
2668             standard input, rather than the command line.
2669             This feature paves the way for faster dbsort for large inputs
2670             (by pipelining sorting and merging), expected in the next release.
2671              
2672             =back
2673              
2674              
2675             =head2 2.36, 2013-02-25
2676             dbsort pipelines with dbmerge
2677              
2678             =over 4
2679              
2680             =item IMPROVEMENT
2681             For large inputs,
2682             L now pipelines sorting and merging,
2683             allowing earlier processing.
2684              
2685             =item BUG FIX
2686             Since 2.35, L delayed cleanup of intermediate files,
2687             thereby requiring extra disk space.
2688              
2689             =back
2690              
2691             =head2 2.37, 2013-02-26
2692             quick bugfix to support parallel sort and merge from recent releases
2693              
2694             =over 4
2695              
2696             =item BUG FIX
2697             Since 2.35, L delayed removal of input files given by
2698             C<--xargs>. This problem is now fixed.
2699              
2700             =back
2701              
2702              
2703             =head2 2.38, 2013-04-29
2704             minor bug fixes
2705              
2706             =over 4
2707              
2708             =item CLARIFICATION
2709              
2710             Configure now rejects Windows since tests seem to hang
2711             on some versions of Windows.
2712             (I would love help from a Windows developer to get this problem fixed,
2713             but I cannot do it.) See F.
2714              
2715             =item IMPROVEMENT
2716              
2717             All programs that use temporary files
2718             (L, L, L, L)
2719             now take the C<-T> option
2720             and set the temporary directory consistently.
2721              
2722             In addition, error messages are better when the temporary directory
2723             has problems. Problem reported by Liang Zhu.
2724              
2725             =item BUG FIX
2726              
2727             L was failing with external, map-reduce aware reducers
2728             (when invoked with -M and an external program).
2729             (Sigh, did this case ever work?)
2730             This case should now work.
2731             Thanks to Yuri Pradkin for reporting this bug (in 2011).
2732              
2733             =item BUG FIX
2734              
2735             Fixed perl-5.10 problem with L.
2736             Thanks to Yuri Pradkin for reporting this bug (in 2013).
2737              
2738             =back
2739              
2740             =head2 2.39, date 2013-05-31
2741             quick release for the dbrowuniq extension
2742              
2743             =over 4
2744              
2745             =item BUG FIX
2746              
2747             Actually in 2.38, the Fedora F<.spec> got cleaner dependencies.
2748             Suggestion from Christopher Meng via L.
2749              
2750             =item ENHANCEMENT
2751              
2752             Fsdb files are now explicitly set into UTF-8 encoding,
2753             unless one specifies C<-encoding> to C.
2754              
2755             =item ENHANCEMENT
2756              
2757             L now supports C<-I> for incremental counting.
2758              
2759             =back
2760              
2761             =head2 2.40, 2013-07-13
2762             small bug fixes
2763              
2764             =over 4
2765              
2766             =item BUG FIX
2767              
2768             L now has more respect for a user-given temporary directory;
2769             it no longer is ignored for merging.
2770              
2771             =item IMPROVEMENT
2772              
2773             L now has options to output the first, last, and both first
2774             and last rows of a run (C<-F>, C<-L>, and C<-B>).
2775              
2776             =item BUG FIX
2777              
2778             L now correctly handles C<-N>. Sigh, it didn't work before.
2779              
2780             =back
2781              
2782             =head2 2.41, 2013-07-29
2783             small bug and packaging fixes
2784              
2785             =over 4
2786              
2787             =item ENHANCEMENT
2788              
2789             Documentation to L improved
2790             (inspired by questions from Qian Kun).
2791              
2792             =item BUG FIX
2793              
2794             L no longer duplicates
2795             singleton unique lines when outputting both (with C<-B>).
2796              
2797             =item BUG FIX
2798              
2799             Add missing C dependency to F.
2800              
2801             =item ENHANCEMENT
2802              
2803             Tests now show the diff of the failing output
2804             if run with C.
2805              
2806             =item ENHANCEMENT
2807              
2808             L now includes documentation for how to output extra rows.
2809             Suggestion from Yuri Pradkin.
2810              
2811             =item BUG FIX
2812              
2813             Several improvements to the Fedora package
2814             from Michael Schwendt
2815             via L,
2816             and from the harsh master that is F.
2817             (I am stymied at teaching it that "outliers" is spelled correctly.
2818             Maybe I should send it Schneier's book. And an unresolvable
2819             invalid-spec-name lurks in the SRPM.)
2820              
2821             =back
2822              
2823             =head2 2.42, 2013-07-31
2824             A bug fix and packaging release.
2825              
2826             =over 4
2827              
2828             =item ENHANCEMENT
2829              
2830             Documentation to L improved
2831             to better memory usage.
2832             (Based on problem report by Lin Quan.)
2833              
2834             =item BUG FIX
2835              
2836             The F<.spec> is now F
2837             to satisfy F.
2838             Thanks to Christopher Meng for a specific bug report.
2839              
2840             =item BUG FIX
2841              
2842             Test F no longer has a column
2843             that caused failures because of numerical instability.
2844              
2845             =item BUG FIX
2846              
2847             Some tests now better handle bugs in old versions of perl (5.10, 5.12).
2848             Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12,
2849             but the fix should affect other platforms.
2850              
2851             =back
2852              
2853             =head2 2.43, 2013-08-27
2854             Adds in-file compression.
2855              
2856             =over 4
2857              
2858             =item BUG FIX
2859              
2860             Changed the sort on F to strings
2861             (from numerics) so we're less susceptible to false test-failures
2862             due to floating point IO differences.
2863              
2864             =item EXPERIMENTAL ENHANCEMENT
2865              
2866             Yet more parallelism in L:
2867             new "endgame-mode" builds a merge tree of processes at the end
2868             of large merge tasks to get maximally parallelism.
2869             Currently this feature is off by default
2870             because it can hang for some inputs.
2871             Enable this experimental feature with C<--endgame>.
2872              
2873             =item ENHANCEMENT
2874              
2875             C now handles being given C objects
2876             (as exercised by L).
2877              
2878             =item BUG FIX
2879              
2880             Handling of NamedTmpfiles now supports concurrency.
2881             This fix will hopefully fix occasional
2882             "Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93."
2883             errors.
2884              
2885             =item BUG FIX
2886              
2887             Fsdb now requires perl 5.10.
2888             This is a bug fix because some test cases used to require it,
2889             but this fact was not properly documented.
2890             (Back-porting to 5.008 would require removing all C operators.)
2891              
2892             =item ENHANCEMENT
2893              
2894             Fsdb now handles automatic compression of file contents.
2895             Enable compression with C
2896             (or C or C).
2897             All programs should operate on compressed files
2898             and leave the output with the same level of compression.
2899             C is recommended as fastest and most efficient.
2900             C is produces unrepeatable output (and so has no
2901             output test), it seems to insist on adding a timestamp.
2902              
2903             =back
2904              
2905             =head2 2.44, 2013-10-02
2906             A major change--all threads are gone.
2907              
2908             =over 4
2909              
2910             =item ENHANCEMENT
2911              
2912             Fsdb is now thread free and only uses processes for parallelism.
2913             This change is a big change--the entire motivation for Fsdb-2
2914             was to exploit parallelism via threading.
2915             Parallelism--good, but perl threading--bad for performance.
2916             Horribly bad for performance.
2917             About 20x worse than pipes on my box.
2918             (See perl bug #119445 for the discussion.)
2919              
2920             =item NEW
2921              
2922             C provides a thread-like abstraction over forking,
2923             with some nice support for callbacks in the parent upon child termination.
2924              
2925             =item ENHANCEMENT
2926              
2927             Details about removing threads:
2928             C is thread free,
2929             and new tests to verify each of its parts.
2930             The easy cases are C,
2931             C, C, C, and
2932             C, each of which use it in simple ways (2013-09-09).
2933             C is now thread free (2013-09-13),
2934             but was a significant rewrite,
2935             which brought C along.
2936             C is partly thread free (2013-09-21),
2937             again as a rewrite,
2938             and it brings C along.
2939             Full C support took much longer (2013-10-02).
2940              
2941             =item BUG FIX
2942              
2943             When running with user-only output (C<-n>),
2944             L now resets the output vector C<$ofref>
2945             after it has been output.
2946              
2947             =item NEW
2948              
2949             L will create all columns at the head of each row
2950             with the C<--first> option.
2951              
2952             =item NEW
2953              
2954             L will concatenate two files,
2955             verifying that they have the same schema.
2956              
2957             =item ENHANCEMENT
2958              
2959             L now passes comments through,
2960             rather than eating them as before.
2961              
2962             Also, L now supports a C<--> option to prevent misinterpreting
2963             sub-program parameters as for dbmapreduce.
2964              
2965             =item INCOMPATIBLE CHANGE
2966              
2967             L no longer figures out if it needs to add the key
2968             to the output. For multi-key-aware reducers, it never does
2969             (and cannot). For non-multi-key-aware reducers,
2970             it defaults to add the key and will now fail if the reducer adds the key
2971             (with error "dbcolcreate: attempt to create pre-existing column...").
2972             In such cases, one must disable adding the key with the new
2973             option C<--no-prepend-key>.
2974              
2975             =item INCOMPATIBLE CHANGE
2976              
2977             L no longer copies the input field separator by default.
2978             For multi-key-aware reducers, it never does
2979             (and cannot). For non-multi-key-aware reducers,
2980             it defaults to I copying the field separator,
2981             but it will copy it (the old default) with the C<--copy-fs> option
2982              
2983             =back
2984              
2985             =head2 2.45, 2013-10-07
2986             cleanup from de-thread-ification
2987              
2988             =over 4
2989              
2990             =item BUG FIX
2991              
2992             Corrected a fast busy-wait in L.
2993              
2994             =item ENHANCEMENT
2995              
2996             Endgame mode enabled in L; it (and also large cases of L)
2997             should now exploit greater parallelism.
2998              
2999             =item BUG FIX
3000              
3001             Test case with C (gone since 2.44) now removed.
3002              
3003             =back
3004              
3005             =head2 2.46, 2013-10-08
3006             continuing cleanup of our no-threads version
3007              
3008             =over 4
3009              
3010             =item BUG FIX
3011              
3012             Fixed some packaging details.
3013             (Really, threads are no longer required,
3014             missing tests in the MANIFEST.)
3015              
3016             =item IMPROVEMENT
3017              
3018             L now better communicates with the merge process to avoid
3019             bursty parallelism.
3020              
3021             L now can take C<-autoflush => 1>
3022             for line-buffered IO.
3023              
3024             =back
3025              
3026             =head2 2.47, 2013-10-12
3027             test suite cleanup for non-threaded perls
3028              
3029             =over 4
3030              
3031             =item BUG FIX
3032              
3033             Removed some stray "use threads" in some test cases.
3034             We didn't need them, and these were breaking non-threaded perls.
3035              
3036             =item BUG FIX
3037              
3038             Better handling of Fred cleanup;
3039             should fix intermittent L failures on BSD.
3040              
3041             =item ENHANCEMENT
3042              
3043             Improved test framework to show output when tests fail.
3044             (This time, for real.)
3045              
3046             =back
3047              
3048             =head2 2.48, 2014-01-03
3049             small bugfixes and improved release engineering
3050              
3051             =over 4
3052              
3053             =item ENHANCEMENT
3054              
3055             Test suites now skip tests for libraries that are missing.
3056             (Patch for missing C contributed by Calvin Ardi.)
3057              
3058             =item ENHANCEMENT
3059              
3060             Removed references to Jdb in the package specification.
3061             Since the name was changed in 2008, there's no longer a huge
3062             need for backwards comparability.
3063             (Suggestion form Petr Å abata.)
3064              
3065             =item ENHANCEMENT
3066              
3067             Test suites now invoke the perl using the path from C<$Config{perlpath}>.
3068             Hopefully this helps testing in environments where there are multiple installed
3069             perls and the default perl is not the same as the perl-under-test
3070             (as happens in cpantesters.org).
3071              
3072             =item BUG FIX
3073              
3074             Added specific encoding to this manpage to account for
3075             Unicode. Required to build correctly against perl-5.18.
3076              
3077             =back
3078              
3079             =head2 2.49, 2014-01-04
3080             bugfix to unicode handling in Fsdb IO (plus minor packaging fixes)
3081              
3082             =over 4
3083              
3084             =item BUG FIX
3085              
3086             Restored a line in the F<.spec> to chmod g-s.
3087              
3088             =item BUG FIX
3089              
3090             Unicode decoding is now handled correctly for programs that read
3091             from standard input.
3092             (Also: New test scripts cover unicode input and output.)
3093              
3094             =item BUG FIX
3095              
3096             Fix to L documentation encoding line.
3097             Addresses test failure in perl-5.16 and earlier.
3098             (Who knew "encoding" had to be followed by a blank line.)
3099              
3100             =back
3101              
3102             =head1 WHAT'S NEW
3103              
3104             =head2 2.50, 2014-05-27
3105             a quick release for spec tweaks
3106              
3107             =over 4
3108              
3109             =item ENHANCEMENT
3110              
3111             In L, the C<-N> (no output, even comments) option now
3112             implies C<-n>, and it now suppresses the header and trailer.
3113              
3114             =item BUG FIX
3115              
3116             A few more tweaks to the F from Petr Å abata.
3117              
3118             =item BUG FIX
3119              
3120             Fixed 3 uses of C in test suites that were causing test
3121             failures (due to warnings, not real failures) on some platforms.
3122              
3123             =back
3124              
3125             =head2 2.51, 2014-09-05
3126             Feature enhancements to L, L, L, and new L
3127              
3128             =over 4
3129              
3130             =item ENHANCEMENT
3131              
3132             L now has a C<--no-recreate-fatal>
3133             that causes it to ignore creation of existing columns
3134             (instead of failing).
3135              
3136             =item ENHANCEMENT
3137              
3138             L once again is robust to reducers
3139             that output the key;
3140             C<--no-prepend-key> is no longer mandatory.
3141              
3142             =item ENHANCEMENT
3143              
3144             L can now enumerate the output rows with C<-E>.
3145              
3146             =item BUG FIX
3147              
3148             L is more mathematically robust.
3149             Previously for some inputs and some platforms,
3150             floating point rounding could
3151             sometimes cause squareroots of negative numbers.
3152              
3153             =item NEW
3154              
3155             L converts the output of the MySQL or MarinaDB
3156             select comment into fsdb format.
3157              
3158             =item INCOMPATIBLE CHANGE
3159              
3160             L now outputs the I row
3161             when doing sloppy numeric comparisons,
3162             to better support test suites.
3163              
3164             =back
3165              
3166             =head2 2.52, 2014-11-03
3167             Fixing the test suite for line number changes.
3168              
3169             =over 4
3170              
3171             =item ENHANCEMENT
3172              
3173             Test suites changes to be robust to exact line numbers of failures,
3174             since different Perl releases fail on different lines.
3175             L
3176              
3177             =back
3178              
3179              
3180             =head2 2.53, 2014-11-26
3181             bug fixes and stability improvements to dbmapreduce
3182              
3183             =over 4
3184              
3185             =item ENHANCEMENT
3186              
3187             The L how supports a C<--quiet> option.
3188              
3189             =item ENHANCEMENT
3190              
3191             Better documention of L.
3192              
3193             =item BUGFIX
3194              
3195             Added groff-base and perl-podlators to the Fedora package spec.
3196             Fixes L.
3197             (Also in package 2.52-2.)
3198              
3199             =item BUGFIX
3200              
3201             An important stability improvement to L.
3202             It, plus L, and L now support
3203             controlled parallelism with the C<--pararallelism=N> option.
3204             They default to run with the number of available CPUs.
3205             L also moderates its level of parallelism.
3206             Previously it would create reducers as needed,
3207             causing CPU thrashing if reducers ran much slower than data production.
3208              
3209             =item BUGFIX
3210              
3211             The combination of L with L now works
3212             as it should. (The obscure bug was an interaction with L
3213             with non-multi-key reducers that output their own key. L
3214             has too many useful corner cases.)
3215              
3216             =back
3217              
3218             =head2 2.54, 2014-11-28
3219             fix for the test suite to correct failing tests on not-my-platform
3220              
3221             =over 4
3222              
3223             =item BUGFIX
3224              
3225             Sigh, the test suite now has a test suite.
3226             Because, yes, I broke it, causing many incorrect failures
3227             at cpantesters.
3228             Now fixed.
3229              
3230             =back
3231              
3232             =head2 2.55, 2015-01-05
3233             many spelling fixes and L tests are more robust to different numeric precision
3234              
3235             =over 4
3236              
3237             =item ENHANCEMENT
3238              
3239             L now can be extra quiet, as I continue to try to track down
3240             a numeric difference on FreeBSD AMD boxes.
3241              
3242             =item ENHANCEMENT
3243              
3244             L gave different test output
3245             (just reflecting rounding error)
3246             when stddev approaches zero. We now detect hand handle this case.
3247             See
3248             and thanks to H. Merijn Brand for the bug report.
3249              
3250             =item BUG FIX
3251              
3252             Many, many spelling bugs found by
3253             H. Merijn Brand; thanks for the bug report.
3254              
3255             =item INCOMPATBLE CHANGE
3256              
3257             A number of programs had misspelled "separator"
3258             in C<--fieldseparator> and C<--columnseparator> options as "seperator".
3259             These are now correctly spelled.
3260              
3261             =back
3262              
3263             =head2 2.56, 2015-02-03
3264             fix against Getopt::Long-2.43's stricter error checkign
3265              
3266             =over 4
3267              
3268             =item BUG FIX
3269              
3270             Internal argument parsing uses Getopt::Long, but mixed pass-through and EE.
3271             Bug reported by Petr Pisar at L.a
3272              
3273             =item BUG FIX
3274              
3275             Added missing BuildRequires for C.
3276              
3277             =back
3278              
3279             =head2 2.57, 2015-04-29
3280             Minor changes, with better performance from L.
3281              
3282             =over 4
3283              
3284             =item BUG FIX
3285              
3286             L now honors C<--remove-inputs> (previously it didn't).
3287             This omission meant that L (and L) would accumulate
3288             files in F when running. Bad news for inputs with 4M keys.
3289              
3290             =item ENHANCMENT
3291              
3292             L should be faster with lots of small keys.
3293             L now supports C<-k> to get some of the functionality of
3294             L (if data is pre-sorted and median/quartiles are not required).
3295              
3296             L now honors C<--remove-inputs> (previously it didn't).
3297             This omission meant that L (and L) would accumulate
3298             files in F when running. Bad news for inputs with 4M keys.
3299              
3300             =back
3301              
3302              
3303             =head2 2.58, 2015-04-30
3304             Bugfix in L
3305              
3306             =over 4
3307              
3308             =item BUG FIX
3309              
3310             Fixed a case where L suffered mojobake in endgame mode.
3311             This bug surfaced when L was applied to large files
3312             (big enough to require merging) with unicode in them;
3313             the symptom was soemthing like:
3314             Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111.
3315              
3316             =back
3317              
3318              
3319             =head2 2.59, 2016-09-01
3320             Collect a few small bug fixes and documentation improvements.
3321              
3322             =over 4
3323              
3324             =item BUG FIX
3325              
3326             More IO is explicitly marked UTF-8 to avoid Perl's tendency to
3327             mojibake on otherwise valid unicode input.
3328             This change helps L.
3329              
3330             =item ENHANCEMENT
3331              
3332             L now crossreferences L.
3333              
3334             =item ENHANCEMENT
3335              
3336             Documentation for L now clarifies that the default is baseline mode.
3337              
3338             =item BUG FIX
3339              
3340             L now propagates C<-T> into the sorting process (if it is required).
3341             Thanks to Lan Wei for reporting this bug.
3342              
3343             =back
3344              
3345              
3346             =head2 2.60, 2016-09-04
3347             Adds support for hash joins.
3348              
3349             =over 4
3350              
3351             =item ENHANCEMENT
3352              
3353             L now supports hash joins
3354             with C<-t lefthash> and C<-t righthash>.
3355             Hash joins cache a table in memory, but do not require
3356             that the other table be sorted.
3357             They are ideal when joining a large table against a small one.
3358              
3359             =back
3360              
3361             =head2 2.61, 2016-09-05
3362             Support left and right outer joins.
3363              
3364             =over 4
3365              
3366             =item ENHANCEMENT
3367              
3368             L now handles left and right outer joins
3369             with C<-t left> and C<-t right>.
3370              
3371             =item ENHANCEMENT
3372              
3373             L hash joins are now selected
3374             with C<-m lefthash> and C<-m righthash>
3375             (not the shortlived C<-t righthash> option).
3376             (Technically this change is incompatible with Fsdd-2.60, but
3377             no one but me ever used that version.)
3378              
3379             =back
3380              
3381             =head2 2.62, 2016-11-29
3382             A new L and other minor improvements.
3383              
3384             =over 4
3385              
3386             =item ENHANCEMENT
3387              
3388             Documentation for L now includes sample output.
3389              
3390             =item NEW
3391              
3392             L converts a specific form of YAML to fsdb.
3393              
3394             =item BUG FIX
3395              
3396             The test suite now uses C rather than C
3397             to make OpenBSD-5.9 happier, I hope.
3398              
3399             =item ENHANCEMENT
3400              
3401             Comments that log operations at the end of each file now do simple
3402             quoting of spaces. (It is not guaranteed to be fully shell-compliant.)
3403              
3404             =item ENHANCEMENT
3405              
3406             There is a new standard option, C<--header>,
3407             allowing one to specify an Fsdb header for inputs that lack it.
3408             Currently it is supported by L,
3409             L, L, L, L,
3410             L.
3411              
3412             =item ENHANCEMENT
3413              
3414             L now allows the B<--possible-pivots> option,
3415             and if it is provided processes the data in one pass.
3416              
3417             =item ENHANCEMENT
3418              
3419             L logs are now quoted.
3420              
3421             =back
3422              
3423             =head1 AUTHOR
3424              
3425             John Heidemann, C
3426              
3427             See L for the many people who have contributed
3428             bug reports and fixes.
3429              
3430              
3431             =head1 COPYRIGHT
3432              
3433             Fsdb is Copyright (C) 1991-2016 by John Heidemann .
3434              
3435             This program is free software; you can redistribute it and/or modify
3436             it under the terms of version 2 of the GNU General Public License as
3437             published by the Free Software Foundation.
3438              
3439             This program is distributed in the hope that it will be useful, but
3440             WITHOUT ANY WARRANTY; without even the implied warranty of
3441             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3442             General Public License for more details.
3443              
3444             You should have received a copy of the GNU General Public License
3445             along with this program; if not, write to the Free Software
3446             Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3447              
3448             A copy of the GNU General Public License can be found in the file
3449             ``COPYING''.
3450              
3451              
3452              
3453             =head1 COMMENTS and BUG REPORTS
3454              
3455             Any comments about these programs should be sent to John Heidemann
3456             C.
3457              
3458              
3459             =cut
3460              
3461             1; # End of Fsdb
3462              
3463             # LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg
3464             # LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un
3465             # LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw
3466             # LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize
3467             # LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct
3468             # LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR
3469             # LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss
3470             # LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff
3471             # LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb
3472             # LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley
3473             # LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl
3474             # LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar
3475             # LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed
3476             # LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah
3477             # LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt
3478             # LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit
3479             # LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh
3480             # LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre
3481             # LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr
3482             # LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz
3483             # LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO
3484             # LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen
3485             # LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw
3486             # LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount
3487             # LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho
3488             # LocalWords: alfred david clark constantine debrorah Fsdb's colized listized
3489             # LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa
3490             # LocalWords: refcount lossage DaGronk dbcolscorellate ipchain