| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Fsdb.pm |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Copyright (C) 1991-2016 by John Heidemann |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
|
9
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License, |
|
10
|
|
|
|
|
|
|
# version 2, as published by the Free Software Foundation. |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
|
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15
|
|
|
|
|
|
|
# GNU General Public License for more details. |
|
16
|
|
|
|
|
|
|
# |
|
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License along |
|
18
|
|
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., |
|
19
|
|
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|
20
|
|
|
|
|
|
|
# |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Fsdb; |
|
23
|
|
|
|
|
|
|
|
|
24
|
2
|
|
|
2
|
|
214032
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
80
|
|
|
25
|
2
|
|
|
2
|
|
10
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
41
|
|
|
26
|
2
|
|
|
2
|
|
1343
|
use utf8; |
|
|
2
|
|
|
|
|
25
|
|
|
|
2
|
|
|
|
|
9
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=encoding utf8 |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Fsdb - a flat-text database for shell scripting |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=cut |
|
36
|
|
|
|
|
|
|
our $VERSION = '2.62'; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Fsdb, the flatfile streaming database is package of commands |
|
41
|
|
|
|
|
|
|
for manipulating flat-ASCII databases from |
|
42
|
|
|
|
|
|
|
shell scripts. Fsdb is useful to process medium amounts of data (with |
|
43
|
|
|
|
|
|
|
very little data you'd do it by hand, with megabytes you might want a |
|
44
|
|
|
|
|
|
|
real database). |
|
45
|
|
|
|
|
|
|
Fsdb was known as as Jdb from 1991 to Oct. 2008. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Fsdb is very good at doing things like: |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 4 |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item * |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
extracting measurements from experimental output |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item * |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
examining data to address different hypotheses |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
joining data from different experiments |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
eliminating/detecting outliers |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
computing statistics on data |
|
70
|
|
|
|
|
|
|
(mean, confidence intervals, correlations, histograms) |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item * |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
reformatting data for graphing programs |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=back |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Fsdb is built around the idea of a flat text file as a database. |
|
79
|
|
|
|
|
|
|
Fsdb files (by convention, with the extension F<.fsdb>), |
|
80
|
|
|
|
|
|
|
have a header documenting the schema (what the columns mean), |
|
81
|
|
|
|
|
|
|
and then each line represents a database record (or row). |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
For example: |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#fsdb experiment duration |
|
86
|
|
|
|
|
|
|
ufs_mab_sys 37.2 |
|
87
|
|
|
|
|
|
|
ufs_mab_sys 37.3 |
|
88
|
|
|
|
|
|
|
ufs_rcp_real 264.5 |
|
89
|
|
|
|
|
|
|
ufs_rcp_real 277.9 |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Is a simple file with four experiments (the rows), |
|
92
|
|
|
|
|
|
|
each with a description, size parameter, and run time |
|
93
|
|
|
|
|
|
|
in the first, second, and third columns. |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Rather than hand-code scripts to do each special case, Fsdb provides |
|
96
|
|
|
|
|
|
|
higher-level functions. Although it's often easy throw together a |
|
97
|
|
|
|
|
|
|
custom script to do any single task, I believe that there are several |
|
98
|
|
|
|
|
|
|
advantages to using Fsdb: |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
these programs provide a higher level interface than plain Perl, so |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=over 4 |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item ** |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Fewer lines of simpler code: |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Picks out just one type of experiment and computes statistics on it, |
|
115
|
|
|
|
|
|
|
rather than: |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; } |
|
118
|
|
|
|
|
|
|
$mean = $sum / $n; $std_dev = ... |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
in dozens of places. |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
the library uses names for columns, so |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=over 4 |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item ** |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
No more C<$F[1]>, use C<_duration>. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item ** |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
New or different order columns? No changes to your scripts! |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=back |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Thus if your experiment gets more complicated with a size parameter, |
|
141
|
|
|
|
|
|
|
so your log changes to: |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#fsdb experiment size duration |
|
144
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.2 |
|
145
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.3 |
|
146
|
|
|
|
|
|
|
ufs_rcp_real 1024 264.5 |
|
147
|
|
|
|
|
|
|
ufs_rcp_real 1024 277.9 |
|
148
|
|
|
|
|
|
|
ufs_mab_sys 2048 45.3 |
|
149
|
|
|
|
|
|
|
ufs_mab_sys 2048 44.2 |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Then the previous scripts still work, even though duration is |
|
152
|
|
|
|
|
|
|
now the third column, not the second. |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=item * |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
A series of actions are self-documenting (each program records what it does). |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=over 4 |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item ** |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
No more wondering what hacks were used to compute the |
|
163
|
|
|
|
|
|
|
final data, just look at the comments at the end |
|
164
|
|
|
|
|
|
|
of the output. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=back |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
For example, the commands |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
add to the end of the output the lines |
|
173
|
|
|
|
|
|
|
# | dbrow _experiment eq "ufs_mab_sys" |
|
174
|
|
|
|
|
|
|
# | dbcolstats duration |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item * |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The library is mature, supporting large datasets, |
|
180
|
|
|
|
|
|
|
corner cases, error handling, backed by an automated test suite. |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=over 4 |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item ** |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
No more puzzling about bad output because your custom script |
|
187
|
|
|
|
|
|
|
skimped on error checking. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item ** |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
No more memory thrashing when you try to sort ten million records. |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item * |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Fsdb-2.x supports Perl scripting (in addition to shell scripting), |
|
198
|
|
|
|
|
|
|
with libraries to do Fsdb input and output, and easy support for pipelines. |
|
199
|
|
|
|
|
|
|
The shell script |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
dbcol name test1 | dbroweval '_test1 += 5;' |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
can be written in perl as: |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;')); |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=back |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
(The disadvantage is that you need to learn what functions Fsdb provides.) |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Fsdb is built on flat-ASCII databases. By storing data in simple text |
|
212
|
|
|
|
|
|
|
files and processing it with pipelines it is easy to experiment (in |
|
213
|
|
|
|
|
|
|
the shell) and look at the output. |
|
214
|
|
|
|
|
|
|
To the best of my knowledge, the original implementation of |
|
215
|
|
|
|
|
|
|
this idea was C, a commercial product described in the book |
|
216
|
|
|
|
|
|
|
I |
|
217
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and |
|
218
|
|
|
|
|
|
|
also at the web page L). Fsdb is an incompatible |
|
219
|
|
|
|
|
|
|
re-implementation of their idea without any accelerated indexing or |
|
220
|
|
|
|
|
|
|
forms support. (But it's free, and probably has better statistics!). |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Fsdb-2.x will exploit multiple processors or cores, |
|
223
|
|
|
|
|
|
|
and provides Perl-level support for input, output, and threaded-pipelines. |
|
224
|
|
|
|
|
|
|
(As of Fsdb-2.44 it no longer uses Perl threading, just processes.) |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Installation instructions follow at the end of this document. |
|
227
|
|
|
|
|
|
|
Fsdb-2.x requires Perl 5.8 to run. |
|
228
|
|
|
|
|
|
|
All commands have manual pages and provide usage with the C<--help> option. |
|
229
|
|
|
|
|
|
|
All commands are backed by an automated test suite. |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
The most recent version of Fsdb is available on the web at |
|
232
|
|
|
|
|
|
|
L. |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 2.62, 2016-11-29 |
|
238
|
|
|
|
|
|
|
A new L and other minor improvements. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=over 4 |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
Documentation for L now includes sample output. |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=item NEW |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
L converts a specific form of YAML to fsdb. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=item BUG FIX |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
The test suite now uses C rather than C |
|
253
|
|
|
|
|
|
|
to make OpenBSD-5.9 happier, I hope. |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Comments that log operations at the end of each file now do simple |
|
258
|
|
|
|
|
|
|
quoting of spaces. (It is not guaranteed to be fully shell-compliant.) |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
There is a new standard option, C<--header>, |
|
263
|
|
|
|
|
|
|
allowing one to specify an Fsdb header for inputs that lack it. |
|
264
|
|
|
|
|
|
|
Currently it is supported by L, |
|
265
|
|
|
|
|
|
|
L, L, L, L, |
|
266
|
|
|
|
|
|
|
L. |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=back |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head1 README CONTENTS |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=over 4 |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item executive summary |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item what's new |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item README CONTENTS |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item installation |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item basic data format |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item basic data manipulation |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=item list of commands |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=item another example |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=item a gradebook example |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=item a password example |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=item history |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item related work |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=item release notes |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item copyright |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=item comments |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=back |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Fsdb now uses the standard Perl build and installation from |
|
312
|
|
|
|
|
|
|
ExtUtil::MakeMaker(3), so the quick answer to installation is to type: |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
perl Makefile.PL |
|
315
|
|
|
|
|
|
|
make |
|
316
|
|
|
|
|
|
|
make test |
|
317
|
|
|
|
|
|
|
make install |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Or, if you want to install it somewhere else, change the first line to |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
perl Makefile.PL PREFIX=$HOME |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
and it will go in your home directory's F, etc. |
|
324
|
|
|
|
|
|
|
(See L for more details.) |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Fsdb requires perl 5.8 or later. |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
A test-suite is available, run it with |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
make test |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
A FreeBSD port to Fsdb is available, see |
|
333
|
|
|
|
|
|
|
L. |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
A Fink (MacOS X) port is available, see |
|
336
|
|
|
|
|
|
|
L. |
|
337
|
|
|
|
|
|
|
(Thanks to Lars Eggert for maintaining this port.) |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head1 BASIC DATA FORMAT |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
These programs are based on the idea storing data in simple ASCII |
|
343
|
|
|
|
|
|
|
files. A database is a file with one header line and then data or |
|
344
|
|
|
|
|
|
|
comment lines. For example: |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
#fsdb account passwd uid gid fullname homedir shell |
|
347
|
|
|
|
|
|
|
johnh * 2274 134 John_Heidemann /home/johnh /bin/bash |
|
348
|
|
|
|
|
|
|
greg * 2275 134 Greg_Johnson /home/greg /bin/bash |
|
349
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
|
350
|
|
|
|
|
|
|
# this is a simple database |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
The header line must be first and begins with C<#h>. |
|
353
|
|
|
|
|
|
|
There are rows (records) and columns (fields), |
|
354
|
|
|
|
|
|
|
just like in a normal database. |
|
355
|
|
|
|
|
|
|
Comment lines begin with C<#>. |
|
356
|
|
|
|
|
|
|
Column names are any string not containing spaces or single quote |
|
357
|
|
|
|
|
|
|
(although it is prudent to keep them alphanumeric with underscore). |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
By default, columns are delimited by whitespace. |
|
360
|
|
|
|
|
|
|
With this default configuration, the contents of a field |
|
361
|
|
|
|
|
|
|
cannot contain whitespace. |
|
362
|
|
|
|
|
|
|
However, this limitation can be relaxed by changing the field separator |
|
363
|
|
|
|
|
|
|
as described below. |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
The big advantage of simple flat-text databases is that |
|
366
|
|
|
|
|
|
|
it is usually easy to massage data into this format, |
|
367
|
|
|
|
|
|
|
and it's reasonably easy to take data out of this |
|
368
|
|
|
|
|
|
|
format into other (text-based) programs, like gnuplot, jgraph, and |
|
369
|
|
|
|
|
|
|
LaTeX. Think Unix. Think pipes. |
|
370
|
|
|
|
|
|
|
(Or even output to Excel and HTML if you prefer.) |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
Since no-whitespace in columns was a problem for some applications, |
|
373
|
|
|
|
|
|
|
there's an option which relaxes this rule. You can specify the field |
|
374
|
|
|
|
|
|
|
separator in the table header with C<-F x> where C is |
|
375
|
|
|
|
|
|
|
a code for the new field separator. |
|
376
|
|
|
|
|
|
|
A full list of codes is at L, |
|
377
|
|
|
|
|
|
|
but two common special values are C<-F t> |
|
378
|
|
|
|
|
|
|
which is a separator of a single tab character, |
|
379
|
|
|
|
|
|
|
and C<-F S>, a separator of two spaces. |
|
380
|
|
|
|
|
|
|
Both allowing (single) spaces in fields. An example: |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
#fsdb -F S account passwd uid gid fullname homedir shell |
|
383
|
|
|
|
|
|
|
johnh * 2274 134 John Heidemann /home/johnh /bin/bash |
|
384
|
|
|
|
|
|
|
greg * 2275 134 Greg Johnson /home/greg /bin/bash |
|
385
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
|
386
|
|
|
|
|
|
|
# this is a simple database |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
See L for more details. Regardless of what the column |
|
389
|
|
|
|
|
|
|
separator is for the body of the data, it's always whitespace in the |
|
390
|
|
|
|
|
|
|
header. |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
There's also a third format: a "list". Because it's often hard to see |
|
393
|
|
|
|
|
|
|
what's columns past the first two, in list format each "column" is on |
|
394
|
|
|
|
|
|
|
a separate line. The programs dblistize and dbcolize convert to and |
|
395
|
|
|
|
|
|
|
from this format, and all programs work with either formats. |
|
396
|
|
|
|
|
|
|
The command |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
dbfilealter -R C < DATA/passwd.fsdb |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
outputs: |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
#fsdb -R C account passwd uid gid fullname homedir shell |
|
403
|
|
|
|
|
|
|
account: johnh |
|
404
|
|
|
|
|
|
|
passwd: * |
|
405
|
|
|
|
|
|
|
uid: 2274 |
|
406
|
|
|
|
|
|
|
gid: 134 |
|
407
|
|
|
|
|
|
|
fullname: John_Heidemann |
|
408
|
|
|
|
|
|
|
homedir: /home/johnh |
|
409
|
|
|
|
|
|
|
shell: /bin/bash |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
account: greg |
|
412
|
|
|
|
|
|
|
passwd: * |
|
413
|
|
|
|
|
|
|
uid: 2275 |
|
414
|
|
|
|
|
|
|
gid: 134 |
|
415
|
|
|
|
|
|
|
fullname: Greg_Johnson |
|
416
|
|
|
|
|
|
|
homedir: /home/greg |
|
417
|
|
|
|
|
|
|
shell: /bin/bash |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
account: root |
|
420
|
|
|
|
|
|
|
passwd: * |
|
421
|
|
|
|
|
|
|
uid: 0 |
|
422
|
|
|
|
|
|
|
gid: 0 |
|
423
|
|
|
|
|
|
|
fullname: Root |
|
424
|
|
|
|
|
|
|
homedir: /root |
|
425
|
|
|
|
|
|
|
shell: /bin/bash |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# this is a simple database |
|
428
|
|
|
|
|
|
|
# | dblistize |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
See L for more details. |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 BASIC DATA MANIPULATION |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
A number of programs exist to manipulate databases. |
|
436
|
|
|
|
|
|
|
Complex functions can be made by stringing together commands |
|
437
|
|
|
|
|
|
|
with shell pipelines. For example, to print the home |
|
438
|
|
|
|
|
|
|
directories of everyone with ``john'' in their names, |
|
439
|
|
|
|
|
|
|
you would do: |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
The output might be: |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
#fsdb homedir |
|
446
|
|
|
|
|
|
|
/home/johnh |
|
447
|
|
|
|
|
|
|
/home/greg |
|
448
|
|
|
|
|
|
|
# this is a simple database |
|
449
|
|
|
|
|
|
|
# | dbrow _fullname =~ /John/ |
|
450
|
|
|
|
|
|
|
# | dbcol homedir |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
(Notice that comments are appended to the output listing each command, |
|
453
|
|
|
|
|
|
|
providing an automatic audit log.) |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
In addition to typical database functions (select, join, etc.) there |
|
456
|
|
|
|
|
|
|
are also a number of statistical functions. |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
The real power of Fsdb is that one can apply arbitrary code to rows |
|
459
|
|
|
|
|
|
|
to do powerful things. |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/' |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
converts "John_Heidemann" into "Heidemann,_John". |
|
464
|
|
|
|
|
|
|
Not too much more work could split fullname into firstname and lastname |
|
465
|
|
|
|
|
|
|
fields. |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head1 TALKING ABOUT COLUMNS |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
An advantage of Fsdb is that you can talk about columns by name |
|
471
|
|
|
|
|
|
|
(symbolically) rather than simply by their positions. So in the above |
|
472
|
|
|
|
|
|
|
example, C pulled out the home directory column, and |
|
473
|
|
|
|
|
|
|
C matched against column fullname. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
In general, you can use the name of the column listed on the C<#fsdb> line |
|
476
|
|
|
|
|
|
|
to identify it in most programs, and _name to identify it in code. |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Some alternatives for flexibility: |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=over 4 |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item * |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Numeric values identify columns positionally, numbering from 0. |
|
485
|
|
|
|
|
|
|
So 0 or _0 is the first column, 1 is the second, etc. |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=item * |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
In code, _last_columnname gets the value from columname's previous row. |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=back |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
See L for more details about writing code. |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head1 LIST OF COMMANDS |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Enough said. I'll summarize the commands, and then you can |
|
500
|
|
|
|
|
|
|
experiment. For a detailed description of each command, see a summary |
|
501
|
|
|
|
|
|
|
by running it with the argument C<--help> (or C<-?> if you prefer.) |
|
502
|
|
|
|
|
|
|
Full manual pages can be found by running the command |
|
503
|
|
|
|
|
|
|
with the argument C<--man>, or running the Unix command C |
|
504
|
|
|
|
|
|
|
or whatever program you want. |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=head2 TABLE CREATION |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=over 4 |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=item dbcolcreate |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
add columns to a database |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item dbcoldefine |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
set the column headings for a non-Fsdb file |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=back |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=head2 TABLE MANIPULATION |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=over 4 |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=item dbcol |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
select columns from a table |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=item dbrow |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
select rows from a table |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=item dbsort |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sort rows based on a set of columns |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=item dbjoin |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
compute the natural join of two tables |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=item dbcolrename |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
rename a column |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=item dbcolmerge |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
merge two columns into one |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=item dbcolsplittocols |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
split one column into two or more columns |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=item dbcolsplittorows |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
split one column into multiple rows |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=item dbfilepivot |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
"pivots" a file, converting multiple rows |
|
559
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=item dbfilevalidate |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
check that db file doesn't have some common errors |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=back |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=head2 COMPUTATION AND STATISTICS |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=over 4 |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=item dbcolstats |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
compute statistics over a column (mean,etc.,optionally median) |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=item dbmultistats |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
group rows by some key value, then compute stats (mean, etc.) over each group |
|
578
|
|
|
|
|
|
|
(equivalent to dbmapreduce with dbcolstats as the reducer) |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=item dbmapreduce |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
group rows (map) and then apply an arbitrary function to each group (reduce) |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=item dbrvstatdiff |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
compare two samples distributions (mean/conf interval/T-test) |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=item dbcolmovingstats |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
computing moving statistics over a column of data |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=item dbcolstatscores |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
compute Z-scores and T-scores over one column of data |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=item dbcolpercentile |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
compute the rank or percentile of a column |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=item dbcolhisto |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
compute histograms over a column of data |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=item dbcolscorrelate |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
compute the coefficient of correlation over several columns |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=item dbcolsregression |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
compute linear regression and correlation for two columns |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=item dbrowaccumulate |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
compute a running sum over a column of data |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=item dbrowcount |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
count the number of rows (a subset of dbstats) |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=item dbrowdiff |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
compute differences between a columns in each row of a table |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=item dbrowenumerate |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
number each row |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=item dbroweval |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
run arbitrary Perl code on each row |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
=item dbrowuniq |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
count/eliminate identical rows (like Unix uniq(1)) |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=item dbfilediff |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
compare fields on rows of a file (something like Unix diff(1)) |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=back |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=head2 OUTPUT CONTROL |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=over 4 |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=item dbcolneaten |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
pretty-print columns |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=item dbfilealter |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
convert between column or list format, or change the column separator |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=item dbfilestripcomments |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
remove comments from a table |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item dbformmail |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
generate a script that sends form mail based on each row |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=back |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=head2 CONVERSIONS |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
(These programs convert data into fsdb. See their web pages for details.) |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=over 4 |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
=item cgi_to_db |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
L |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=item combined_log_format_to_db |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
L |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=item html_table_to_db |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
HTML tables to fsdb (assuming they're reasonably formatted). |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item kitrace_to_db |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
L |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=item ns_to_db |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
L |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=item sqlselect_to_db |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
the output of SQL SELECT tables to db |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=item tabdelim_to_db |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
spreadsheet tab-delimited files to db |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=item tcpdump_to_db |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
(see man tcpdump(8) on any reasonable system) |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=item xml_to_db |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
XML input to fsdb, assuming they're very regular |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
=back |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
(And out of fsdb:) |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=over 4 |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=item db_to_csv |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
Comma-separated-value format from fsdb. |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
=item db_to_html_table |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
simple conversion of Fsdb to html tables |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=back |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
=head2 STANDARD OPTIONS |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
Many programs have common options: |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=over 4 |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
=item B<-?> or B<--help> |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
Show basic usage. |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=item B<-N> on B<--new-name> |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
When a command creates a new column like L's C, |
|
736
|
|
|
|
|
|
|
this option lets one override the default name of that new column. |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
=item B<-T TmpDir> |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
where to put tmp files. |
|
741
|
|
|
|
|
|
|
Also uses environment variable TMPDIR, if -T is |
|
742
|
|
|
|
|
|
|
not specified. |
|
743
|
|
|
|
|
|
|
Default is /tmp. |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
Show basic usage. |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=item B<-c FRACTION> or B<--confidence FRACTION> |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
Specify confidence interval FRACTION (L, L, etc.) |
|
750
|
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=item B<-C S> or C<--element-separator S> |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Specify column separator S (L, L). |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=item B<-d> or B<--debug> |
|
756
|
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
Enable debugging (may be repeated for greater effect in some cases). |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=item B<-a> or B<--include-non-numeric> |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
Compute stats over all data (treating non-numbers as zeros). |
|
762
|
|
|
|
|
|
|
(By default, things that can't be treated as numbers |
|
763
|
|
|
|
|
|
|
are ignored for stats purposes) |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item B<-S> or B<--pre-sorted> |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
Assume the data is pre-sorted. |
|
768
|
|
|
|
|
|
|
May be repeated to disable verification (saving a small amount of work). |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=item B<-e E> or B<--empty E> |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
give value E as the value for empty (null) records |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=item B<-i I> or B<--input I> |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
Input data from file I. |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=item B<-o O> or B<--output O> |
|
779
|
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
Write data out to file O. |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=item B<--header> H |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
Use H as the full Fsdb header, rather than reading a header from |
|
785
|
|
|
|
|
|
|
then input. This option is particularly useful when using Fsdb |
|
786
|
|
|
|
|
|
|
under Hadoop, where split files don't have heades. |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=item B<--nolog>. |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
Skip logging the program in a trailing comment. |
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
=back |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
When giving Perl code (in L and L) |
|
795
|
|
|
|
|
|
|
column names can be embedded if preceded by underscores. |
|
796
|
|
|
|
|
|
|
Look at L or L for examples.) |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
Most programs run in constant memory and use temporary files if necessary. |
|
799
|
|
|
|
|
|
|
Exceptions are L, L, L, |
|
800
|
|
|
|
|
|
|
L, L. |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
=head1 ANOTHER EXAMPLE |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
Take the raw data in C, |
|
806
|
|
|
|
|
|
|
put a header on it (C), |
|
807
|
|
|
|
|
|
|
took statistics of each category (C), |
|
808
|
|
|
|
|
|
|
pick out the relevant fields (C), and you get: |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
|
811
|
|
|
|
|
|
|
1024 1.4962e+06 2.8497e+05 19.047 |
|
812
|
|
|
|
|
|
|
10240 5.0286e+06 6.0103e+05 11.952 |
|
813
|
|
|
|
|
|
|
102400 4.9216e+06 3.0939e+05 6.2863 |
|
814
|
|
|
|
|
|
|
# | dbcoldefine size bw |
|
815
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbmultistats -k size bw |
|
816
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
(The whole command was: |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
cat DATA/http_bandwidth | |
|
821
|
|
|
|
|
|
|
dbcoldefine size | |
|
822
|
|
|
|
|
|
|
dbmultistats -k size bw | |
|
823
|
|
|
|
|
|
|
dbcol size mean stddev pct_rsd |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
all on one line.) |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Then post-process them to get rid of the exponential notation |
|
828
|
|
|
|
|
|
|
by adding this to the end of the pipeline: |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);' |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
(Actually, this step is no longer required since L |
|
833
|
|
|
|
|
|
|
now uses a different default format.) |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
giving: |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
|
838
|
|
|
|
|
|
|
1024 1496200 284970 19.047 |
|
839
|
|
|
|
|
|
|
10240 5028600 601030 11.952 |
|
840
|
|
|
|
|
|
|
102400 4921600 309390 6.2863 |
|
841
|
|
|
|
|
|
|
# | dbcoldefine size bw |
|
842
|
|
|
|
|
|
|
# | dbmultistats -k size bw |
|
843
|
|
|
|
|
|
|
# | dbcol size mean stddev pct_rsd |
|
844
|
|
|
|
|
|
|
# | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); } |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
In a few lines, raw data is transformed to processed output. |
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
Suppose you expect there is an odd distribution of results of one |
|
850
|
|
|
|
|
|
|
datapoint. Fsdb can easily produce a CDF (cumulative distribution |
|
851
|
|
|
|
|
|
|
function) of the data, suitable for graphing: |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
cat DB/DATA/http_bandwidth | \ |
|
854
|
|
|
|
|
|
|
dbcoldefine size bw | \ |
|
855
|
|
|
|
|
|
|
dbrow '_size == 102400' | \ |
|
856
|
|
|
|
|
|
|
dbcol bw | \ |
|
857
|
|
|
|
|
|
|
dbsort -n bw | \ |
|
858
|
|
|
|
|
|
|
dbrowenumerate | \ |
|
859
|
|
|
|
|
|
|
dbcolpercentile count | \ |
|
860
|
|
|
|
|
|
|
dbcol bw percentile | \ |
|
861
|
|
|
|
|
|
|
xgraph |
|
862
|
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
The steps, roughly: |
|
864
|
|
|
|
|
|
|
1. get the raw input data and turn it into fsdb format, |
|
865
|
|
|
|
|
|
|
2. pick out just the relevant column (for efficiency) and sort it, |
|
866
|
|
|
|
|
|
|
3. for each data point, assign a CDF percentage to it, |
|
867
|
|
|
|
|
|
|
4. pick out the two columns to graph and show them |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=head1 A GRADEBOOK EXAMPLE |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
The first commercial program I wrote was a gradebook, |
|
873
|
|
|
|
|
|
|
so here's how to do it with Fsdb. |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
Format your data like DATA/grades. |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
#fsdb name email id test1 |
|
878
|
|
|
|
|
|
|
a a@ucla.example.edu 1 80 |
|
879
|
|
|
|
|
|
|
b b@usc.example.edu 2 70 |
|
880
|
|
|
|
|
|
|
c c@isi.example.edu 3 65 |
|
881
|
|
|
|
|
|
|
d d@lmu.example.edu 4 90 |
|
882
|
|
|
|
|
|
|
e e@caltech.example.edu 5 70 |
|
883
|
|
|
|
|
|
|
f f@oxy.example.edu 6 90 |
|
884
|
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
Or if your students have spaces in their names, use C<-F S> and two spaces |
|
886
|
|
|
|
|
|
|
to separate each column: |
|
887
|
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
#fsdb -F S name email id test1 |
|
889
|
|
|
|
|
|
|
alfred aho a@ucla.example.edu 1 80 |
|
890
|
|
|
|
|
|
|
butler lampson b@usc.example.edu 2 70 |
|
891
|
|
|
|
|
|
|
david clark c@isi.example.edu 3 65 |
|
892
|
|
|
|
|
|
|
constantine drovolis d@lmu.example.edu 4 90 |
|
893
|
|
|
|
|
|
|
debrorah estrin e@caltech.example.edu 5 70 |
|
894
|
|
|
|
|
|
|
sally floyd f@oxy.example.edu 6 90 |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
To compute statistics on an exam, do |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
cat DATA/grades | dbstats test1 |dblistize |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
giving |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
#fsdb -R C ... |
|
903
|
|
|
|
|
|
|
mean: 77.5 |
|
904
|
|
|
|
|
|
|
stddev: 10.84 |
|
905
|
|
|
|
|
|
|
pct_rsd: 13.987 |
|
906
|
|
|
|
|
|
|
conf_range: 11.377 |
|
907
|
|
|
|
|
|
|
conf_low: 66.123 |
|
908
|
|
|
|
|
|
|
conf_high: 88.877 |
|
909
|
|
|
|
|
|
|
conf_pct: 0.95 |
|
910
|
|
|
|
|
|
|
sum: 465 |
|
911
|
|
|
|
|
|
|
sum_squared: 36625 |
|
912
|
|
|
|
|
|
|
min: 65 |
|
913
|
|
|
|
|
|
|
max: 90 |
|
914
|
|
|
|
|
|
|
n: 6 |
|
915
|
|
|
|
|
|
|
... |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
To do a histogram: |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
cat DATA/grades | dbcolhisto -n 5 -g test1 |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
giving |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
#fsdb low histogram |
|
924
|
|
|
|
|
|
|
65 * |
|
925
|
|
|
|
|
|
|
70 ** |
|
926
|
|
|
|
|
|
|
75 |
|
927
|
|
|
|
|
|
|
80 * |
|
928
|
|
|
|
|
|
|
85 |
|
929
|
|
|
|
|
|
|
90 ** |
|
930
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1 |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Now you want to send out grades to the students by e-mail. |
|
933
|
|
|
|
|
|
|
Create a form-letter (in the file F): |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
To: _email (_name) |
|
936
|
|
|
|
|
|
|
From: J. Random Professor |
|
937
|
|
|
|
|
|
|
Subject: test1 scores |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
_name, your score on test1 was _test1. |
|
940
|
|
|
|
|
|
|
86+ A |
|
941
|
|
|
|
|
|
|
75-85 B |
|
942
|
|
|
|
|
|
|
70-74 C |
|
943
|
|
|
|
|
|
|
0-69 F |
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
Generate the shell script that will send the mail out: |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt > test1.sh |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
And run it: |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sh
|
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
The last two steps can be combined: |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt | sh |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
but I like to keep a copy of exactly what I send. |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
At the end of the semester you'll want to compute grade totals and |
|
961
|
|
|
|
|
|
|
assign letter grades. Both fall out of dbroweval. |
|
962
|
|
|
|
|
|
|
For example, to compute weighted total grades with a 40% midterm/60% |
|
963
|
|
|
|
|
|
|
final where the midterm is 84 possible points and the final 100: |
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
dbcol -rv total | |
|
966
|
|
|
|
|
|
|
dbcolcreate total - | |
|
967
|
|
|
|
|
|
|
dbroweval ' |
|
968
|
|
|
|
|
|
|
_total = .40 * _midterm/84.0 + .60 * _final/100.0; |
|
969
|
|
|
|
|
|
|
_total = sprintf("%4.2f", _total); |
|
970
|
|
|
|
|
|
|
if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' | |
|
971
|
|
|
|
|
|
|
dbcolneaten |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
If you got the data originally from a spreadsheet, save it in |
|
975
|
|
|
|
|
|
|
"tab-delimited" format and convert it with tabdelim_to_db |
|
976
|
|
|
|
|
|
|
(run tabdelim_to_db -? for examples). |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
=head1 A PASSWORD EXAMPLE |
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
To convert the Unix password file to db: |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
cat /etc/passwd | sed 's/:/ /g'| \ |
|
984
|
|
|
|
|
|
|
dbcoldefine -F S login password uid gid gecos home shell \ |
|
985
|
|
|
|
|
|
|
>passwd.fsdb |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
To convert the group file |
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
cat /etc/group | sed 's/:/ /g' | \ |
|
990
|
|
|
|
|
|
|
dbcoldefine -F S group password gid members \ |
|
991
|
|
|
|
|
|
|
>group.fsdb |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
To show the names of the groups that div7-members are in |
|
994
|
|
|
|
|
|
|
(assuming DIV7 is in the gecos field): |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \ |
|
997
|
|
|
|
|
|
|
dbjoin -i - -i group.fsdb gid | dbcol login group |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
=head1 SHORT EXAMPLES |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
Which Fsdb programs are the most complicated (based on number of test cases)? |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
ls TEST/*.cmd | \ |
|
1005
|
|
|
|
|
|
|
dbcoldefine test | \ |
|
1006
|
|
|
|
|
|
|
dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \ |
|
1007
|
|
|
|
|
|
|
dbrowuniq -c | \ |
|
1008
|
|
|
|
|
|
|
dbsort -nr count | \ |
|
1009
|
|
|
|
|
|
|
dbcolneaten |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
(Answer: L, then L, L and L.) |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)? |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming |
|
1022
|
|
|
|
|
|
|
there's a common student id in column "id": |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
dbcol id hw1 t.fsdb |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
dbjoin -a -e - grades.fsdb t.fsdb id | \ |
|
1027
|
|
|
|
|
|
|
dbsort name | \ |
|
1028
|
|
|
|
|
|
|
dbcolneaten >new_grades.fsdb |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
Merging two fsdb files with the same rows: |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb >output.fsdb |
|
1034
|
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
or if you want to clean things up a bit |
|
1036
|
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
or if you want to know where the data came from |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
for i in 1 2 |
|
1042
|
|
|
|
|
|
|
do |
|
1043
|
|
|
|
|
|
|
dbcolcreate source $i < file$i.fsdb |
|
1044
|
|
|
|
|
|
|
done >output.fsdb |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
(assumes you're using a Bourne-shell compatible shell, not csh). |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
=head1 WARNINGS |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
As with any tool, one should (which means I) understand |
|
1052
|
|
|
|
|
|
|
the limits of the tool. |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
All Fsdb tools should run in I. |
|
1055
|
|
|
|
|
|
|
In some cases (such as F with quartiles, where the whole input |
|
1056
|
|
|
|
|
|
|
must be re-read), programs will spool data to disk if necessary. |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
Most tools buffer one or a few lines of data, so memory |
|
1059
|
|
|
|
|
|
|
will scale with the size of each line. |
|
1060
|
|
|
|
|
|
|
(So lines with many columns, or when columns have lots data, |
|
1061
|
|
|
|
|
|
|
may cause large memory consumption.) |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
All Fsdb tools should run in constant or at worst C time. |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
All Fsdb tools use normal Perl math routines for computation. |
|
1066
|
|
|
|
|
|
|
Although I make every attempt to choose numerically stable algorithms |
|
1067
|
|
|
|
|
|
|
(although I also welcome feedback and suggestions for improvement), |
|
1068
|
|
|
|
|
|
|
normal rounding due to computer floating point approximations |
|
1069
|
|
|
|
|
|
|
can result in inaccuracies when data spans a large range of precision. |
|
1070
|
|
|
|
|
|
|
(See for example the F test cases.) |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
Any requirements and limitations of each Fsdb tool |
|
1073
|
|
|
|
|
|
|
is documented on its manual page. |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
If any Fsdb program violates these assumptions, |
|
1076
|
|
|
|
|
|
|
that is a bug that should be documented |
|
1077
|
|
|
|
|
|
|
on the tool's manual page or ideally fixed. |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have |
|
1080
|
|
|
|
|
|
|
some bugs. Fsdb should work on perl from version 5.10 onward. |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
=head1 HISTORY |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
There have been three versions of Fsdb; |
|
1086
|
|
|
|
|
|
|
fsdb 1.0 is a complete re-write of the pre-1995 versions, |
|
1087
|
|
|
|
|
|
|
and was |
|
1088
|
|
|
|
|
|
|
distributed from 1995 to 2007. |
|
1089
|
|
|
|
|
|
|
Fsdb 2.0 is a significant re-write of the 1.x versions |
|
1090
|
|
|
|
|
|
|
for reasons described below. |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
Fsdb (in its various forms) has been used extensively by its author |
|
1093
|
|
|
|
|
|
|
since 1991. Since 1995 it's been used by two other researchers at |
|
1094
|
|
|
|
|
|
|
UCLA and several at ISI. In February 1998 it was announced to the |
|
1095
|
|
|
|
|
|
|
Internet. Since then it has found a few users, some outside where I |
|
1096
|
|
|
|
|
|
|
work. |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=head2 Fsdb 2.0 Rationale |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
I've thought about fsdb-2.0 for many years, but it was started |
|
1101
|
|
|
|
|
|
|
in earnest in 2007. Fsdb-2.0 has the following goals: |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=over 4 |
|
1104
|
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
=item in-one-process processing |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
While fsdb is great on the Unix command line as a pipeline between |
|
1108
|
|
|
|
|
|
|
programs, it should I be possible to set it up to run in a single |
|
1109
|
|
|
|
|
|
|
process. And if it does so, it should be able to avoid serializing |
|
1110
|
|
|
|
|
|
|
and deserializing (converting to and from text) data between each module. |
|
1111
|
|
|
|
|
|
|
(Accomplished in fsdb-2.0: see L, although still needs tuning.) |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
=item clean IO API |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is |
|
1116
|
|
|
|
|
|
|
very, very crufty. More than just being ugly (but it was that too), |
|
1117
|
|
|
|
|
|
|
this made things reading from one format file and writing to another |
|
1118
|
|
|
|
|
|
|
the application's job, when it should be the library's. |
|
1119
|
|
|
|
|
|
|
(Accomplished in fsdb-1.15 and improved in 2.0: see L.) |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
=item normalized module APIs |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
Because fsdb modules were added as needed over 10 years, |
|
1124
|
|
|
|
|
|
|
sometimes the module APIs became inconsistent. |
|
1125
|
|
|
|
|
|
|
(For example, the 1.x C required an empty |
|
1126
|
|
|
|
|
|
|
value following the name of the new column, |
|
1127
|
|
|
|
|
|
|
but other programs specify empty values with the C<-e> argument.) |
|
1128
|
|
|
|
|
|
|
We should smooth over these inconsistencies. |
|
1129
|
|
|
|
|
|
|
(Accomplished as each module was ported in 2.0 through 2.7.) |
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
=item everyone handles all input formats |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
Given a clean IO API, the distinction between "colized" |
|
1134
|
|
|
|
|
|
|
and "listized" fsdb files should go away. Any program |
|
1135
|
|
|
|
|
|
|
should be able to read and write files in any format. |
|
1136
|
|
|
|
|
|
|
(Accomplished in fsdb-2.1.) |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=back |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Fsdb-2.0 preserves backwards compatibility where possible, |
|
1141
|
|
|
|
|
|
|
but breaks it where necessary to accomplish the above goals. |
|
1142
|
|
|
|
|
|
|
In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions. |
|
1143
|
|
|
|
|
|
|
Benchmarking in 2013 showed that threading performed much worse than |
|
1144
|
|
|
|
|
|
|
just using pipes, so Fsdb-2.44 uses threading "style", |
|
1145
|
|
|
|
|
|
|
but implemented with processes (via my "Freds" library). |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
=head2 Contributors |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Fsdb includes code ported from Geoff Kuenning (C). |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
Fsdb contributors: |
|
1152
|
|
|
|
|
|
|
Ashvin Goel F, |
|
1153
|
|
|
|
|
|
|
Geoff Kuenning F, |
|
1154
|
|
|
|
|
|
|
Vikram Visweswariah F, |
|
1155
|
|
|
|
|
|
|
Kannan Varadahan F, |
|
1156
|
|
|
|
|
|
|
Lars Eggert F, |
|
1157
|
|
|
|
|
|
|
Arkadi Gelfond F, |
|
1158
|
|
|
|
|
|
|
David Graff F, |
|
1159
|
|
|
|
|
|
|
Haobo Yu F, |
|
1160
|
|
|
|
|
|
|
Pavlin Radoslavov F, |
|
1161
|
|
|
|
|
|
|
Graham Phillips, |
|
1162
|
|
|
|
|
|
|
Yuri Pradkin, |
|
1163
|
|
|
|
|
|
|
Alefiya Hussain, |
|
1164
|
|
|
|
|
|
|
Ya Xu, |
|
1165
|
|
|
|
|
|
|
Michael Schwendt, |
|
1166
|
|
|
|
|
|
|
Fabio Silva F, |
|
1167
|
|
|
|
|
|
|
Jerry Zhao F, |
|
1168
|
|
|
|
|
|
|
Ning Xu F, |
|
1169
|
|
|
|
|
|
|
Martin Lukac F, |
|
1170
|
|
|
|
|
|
|
Xue Cai, |
|
1171
|
|
|
|
|
|
|
Michael McQuaid, |
|
1172
|
|
|
|
|
|
|
Christopher Meng, |
|
1173
|
|
|
|
|
|
|
Calvin Ardi, |
|
1174
|
|
|
|
|
|
|
H. Merijn Brand, |
|
1175
|
|
|
|
|
|
|
Lan Wei. |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
Fsdb includes datasets contributed from NIST (F), |
|
1178
|
|
|
|
|
|
|
from |
|
1179
|
|
|
|
|
|
|
L, |
|
1180
|
|
|
|
|
|
|
the NIST/SEMATECH e-Handbook of Statistical Methods, section |
|
1181
|
|
|
|
|
|
|
1.4.2.8.1. Background and Data. The source is public domain, and |
|
1182
|
|
|
|
|
|
|
reproduced with permission. |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
=head1 RELATED WORK |
|
1188
|
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
As stated in the introduction, Fsdb is an incompatible reimplementation |
|
1190
|
|
|
|
|
|
|
of the ideas found in C. By storing data in simple text files and |
|
1191
|
|
|
|
|
|
|
processing it with pipelines it is easy to experiment (in the shell) |
|
1192
|
|
|
|
|
|
|
and look at the output. The original implementation of this idea was |
|
1193
|
|
|
|
|
|
|
/rdb, a commercial product described in the book I
|
|
1194
|
|
|
|
|
|
|
database management: application development in the UNIX environment> |
|
1195
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web |
|
1196
|
|
|
|
|
|
|
page L). |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
While Fsdb is inspired by Rdb, it includes no code from it, |
|
1199
|
|
|
|
|
|
|
and Fsdb makes several different design choices. |
|
1200
|
|
|
|
|
|
|
In particular: rdb attempts to be closer to a "real" database, |
|
1201
|
|
|
|
|
|
|
with provision for locking, file indexing. |
|
1202
|
|
|
|
|
|
|
Fsdb focuses on single user use and so eschews these choices. |
|
1203
|
|
|
|
|
|
|
Rdb also has some support for interactive editing. |
|
1204
|
|
|
|
|
|
|
Fsdb leaves editing to text editors like emacs or vi. |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
In August, 2002 I found out Carlo Strozzi extended RDB with his |
|
1207
|
|
|
|
|
|
|
package NoSQL L. According to |
|
1208
|
|
|
|
|
|
|
Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of |
|
1209
|
|
|
|
|
|
|
RDB. Although I haven't found Perl startup overhead to be a big |
|
1210
|
|
|
|
|
|
|
problem on my platforms (from old Sparcstation IPCs to 2GHz |
|
1211
|
|
|
|
|
|
|
Pentium-4s), you may want to evaluate his system. |
|
1212
|
|
|
|
|
|
|
The Linux Journal has a description of NoSQL |
|
1213
|
|
|
|
|
|
|
at L. |
|
1214
|
|
|
|
|
|
|
It seems quite similar to Fsdb. |
|
1215
|
|
|
|
|
|
|
Like /rdb, NoSQL supports indexing (not present in Fsdb). |
|
1216
|
|
|
|
|
|
|
Fsdb appears to have richer support for statistics, |
|
1217
|
|
|
|
|
|
|
and, as of Fsdb-2.x, its support for Perl threading may support |
|
1218
|
|
|
|
|
|
|
faster performance (one-process, less serialization and deserialization). |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
=head1 RELEASE NOTES |
|
1222
|
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
Versions prior to 1.0 were released informally on my web page |
|
1224
|
|
|
|
|
|
|
but were not announced. |
|
1225
|
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
=head2 0.0 1991 |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
started for my own research use |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
=head2 0.1 26-May-94 |
|
1231
|
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
first check-in to RCS |
|
1233
|
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head2 0.2 15-Mar-95 |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
parts now require perl5 |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
=head2 1.0, 22-Jul-97 |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
adds autoconf support and a test script. |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
=head2 1.1, 20-Jan-98 |
|
1243
|
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
support for double space field separators, better tests |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=head2 1.2, 11-Feb-98 |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
minor changes and release on comp.lang.perl.announce |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
=head2 1.3, 17-Mar-98 |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=over 4 |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
=item * |
|
1255
|
|
|
|
|
|
|
adds median and quartile options to dbstats |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
=item * |
|
1259
|
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
adds dmalloc_to_db converter |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
=item * |
|
1264
|
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
fixes some warnings |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=item * |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
dbjoin now can run on unsorted input |
|
1271
|
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
=item * |
|
1274
|
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
fixes a dbjoin bug |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
=item * |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
some more tests in the test suite |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=back |
|
1283
|
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=head2 1.4, 27-Mar-98 |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=over 4 |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
=item * |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
improves error messages (all should now report the program that makes the error) |
|
1291
|
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
=item * |
|
1293
|
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
fixed a bug in dbstats output when the mean is zero |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=back |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
=head2 1.5, 25-Jun-98 |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=over 4 |
|
1301
|
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
=item BUG FIX |
|
1303
|
|
|
|
|
|
|
dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=item NEW |
|
1306
|
|
|
|
|
|
|
dbcolstats computes zscores and tscores over a column |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
=item NEW |
|
1309
|
|
|
|
|
|
|
dbcolscorrelate computes correlation coefficients between two columns |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=item INTERNAL |
|
1312
|
|
|
|
|
|
|
ficus_getopt.pl has been replaced by DbGetopt.pm |
|
1313
|
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=item BUG FIX |
|
1315
|
|
|
|
|
|
|
all tests are now ``portable'' (previously some tests ran only on my system) |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
=item BUG FIX |
|
1318
|
|
|
|
|
|
|
you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond) |
|
1319
|
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
=item BUG FIX |
|
1321
|
|
|
|
|
|
|
installation no longer uses cp -f (to work on SunOS 4) |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
=back |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=head2 1.6, 24-May-99 |
|
1326
|
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
=over 4 |
|
1328
|
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
=item NEW |
|
1330
|
|
|
|
|
|
|
dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary) |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
=item NEW |
|
1333
|
|
|
|
|
|
|
dbcolmovingstats does moving means over a series of data |
|
1334
|
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
=item NEW |
|
1336
|
|
|
|
|
|
|
dbcol has a -v option to get all columns except those listed |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
=item NEW |
|
1339
|
|
|
|
|
|
|
dbmultistats does quartiles and medians |
|
1340
|
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
=item NEW |
|
1342
|
|
|
|
|
|
|
dbstripextraheaders now also cleans up bogus comments before the fist header |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
=item BUG FIX |
|
1345
|
|
|
|
|
|
|
dbcolneaten works better with double-space-separated data |
|
1346
|
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=back |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=head2 1.7, 5-Jan-00 |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=over 4 |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=item NEW |
|
1354
|
|
|
|
|
|
|
dbcolize now detects and rejects lines that contain embedded copies of the field separator |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=item NEW |
|
1357
|
|
|
|
|
|
|
configure tries harder to prevent people from improperly configuring/installing fsdb |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=item NEW |
|
1360
|
|
|
|
|
|
|
tcpdump_to_db converter (incomplete) |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
=item NEW |
|
1363
|
|
|
|
|
|
|
tabdelim_to_db converter: from spreadsheet tab-delimited files to db |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=item NEW |
|
1366
|
|
|
|
|
|
|
mailing lists for fsdb are C and C |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message. |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=item BUG FIX |
|
1371
|
|
|
|
|
|
|
dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case. |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=item BUG FIX |
|
1374
|
|
|
|
|
|
|
the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency. |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
=item BUG FIX (DOCUMENTATION) |
|
1377
|
|
|
|
|
|
|
dbcolmovingstats had incorrect sample output. |
|
1378
|
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=back |
|
1380
|
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=head2 1.8, 28-Jun-00 |
|
1382
|
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
=over 4 |
|
1384
|
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
=item BUG FIX |
|
1386
|
|
|
|
|
|
|
header options are now preserved when writing with dblistize |
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
=item NEW |
|
1389
|
|
|
|
|
|
|
dbrowuniq now optionally checks for uniqueness only on certain fields |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=item NEW |
|
1392
|
|
|
|
|
|
|
dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
=item NEW |
|
1395
|
|
|
|
|
|
|
converter for "crl" format network traces |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
=item NEW |
|
1398
|
|
|
|
|
|
|
anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo. |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=item OPTIMIZATION |
|
1401
|
|
|
|
|
|
|
comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.) |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
=item BUG FIX |
|
1404
|
|
|
|
|
|
|
dbstats with median/quartiles now correctly handles singleton data points. |
|
1405
|
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=back |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=head2 1.9, 6-Nov-00 |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=over 4 |
|
1411
|
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
=item NEW |
|
1413
|
|
|
|
|
|
|
dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov). |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=item BUG FIX |
|
1416
|
|
|
|
|
|
|
dbsort now works with perl-5.6 |
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=back |
|
1419
|
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=head2 1.10, 10-Apr-01 |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=over 4 |
|
1423
|
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=item BUG FIX |
|
1425
|
|
|
|
|
|
|
dbstats now handles the case where there are more n-tiles than data |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
=item NEW |
|
1428
|
|
|
|
|
|
|
dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu) |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=item BUG FIX |
|
1431
|
|
|
|
|
|
|
dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu) |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
=item BUG FIX |
|
1434
|
|
|
|
|
|
|
cleanup of temporary files is slightly better |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
=back |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=head2 1.11, 2-Nov-01 |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=over 4 |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
=item BUG FIX |
|
1443
|
|
|
|
|
|
|
dbcolneaten now runs in constant memory |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
=item NEW |
|
1446
|
|
|
|
|
|
|
dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
=item OPTIMIZATION |
|
1449
|
|
|
|
|
|
|
dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001) |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
=item INTERNAL |
|
1452
|
|
|
|
|
|
|
t_distr now ported to perl5 module DbTDistr |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=back |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
=head2 1.12, 30-Oct-02 |
|
1457
|
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=over 4 |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
=item BUG FIX |
|
1461
|
|
|
|
|
|
|
dbmultistats documentation typo fixed |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
=item NEW |
|
1464
|
|
|
|
|
|
|
dbcolmultiscale |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
=item NEW |
|
1467
|
|
|
|
|
|
|
dbcol has -r option for "relaxed error checking" |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
=item NEW |
|
1470
|
|
|
|
|
|
|
dbcolneaten has new -e option to strip end-of-line spaces |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
=item NEW |
|
1473
|
|
|
|
|
|
|
dbrow finally has a -v option to negate the test |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
=item BUG FIX |
|
1476
|
|
|
|
|
|
|
math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases) |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
=item BUG FIX |
|
1479
|
|
|
|
|
|
|
some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.) |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
=item BUG FIX |
|
1482
|
|
|
|
|
|
|
fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem) |
|
1483
|
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
=item NEW |
|
1485
|
|
|
|
|
|
|
db_to_html_table: simple conversion to html tables (NO fancy stuff) |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
=item NEW |
|
1488
|
|
|
|
|
|
|
dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML |
|
1489
|
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=back |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
=head2 1.13, 4-Feb-04 |
|
1494
|
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
=over 4 |
|
1497
|
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
=item NEW |
|
1499
|
|
|
|
|
|
|
fsdb added to the freebsd ports tree L. Maintainer: C |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
=item BUG FIX |
|
1502
|
|
|
|
|
|
|
properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C. |
|
1503
|
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
=item NEW |
|
1505
|
|
|
|
|
|
|
dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README. |
|
1506
|
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=item NEW |
|
1508
|
|
|
|
|
|
|
cgi_to_db converts CGI.pm-format storage to fsdb list format |
|
1509
|
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
=item BUG FIX |
|
1511
|
|
|
|
|
|
|
handle numeric synonyms for column names in dbcol properly |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1514
|
|
|
|
|
|
|
"talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert. |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
=item CHANGE |
|
1517
|
|
|
|
|
|
|
dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root) |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=item NEW |
|
1520
|
|
|
|
|
|
|
on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode |
|
1521
|
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
=item NEW |
|
1523
|
|
|
|
|
|
|
dbfilevalidate: check a db file for some common errors |
|
1524
|
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
=back |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=head2 1.14, 24-Aug-06 |
|
1529
|
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
=over 4 |
|
1531
|
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1534
|
|
|
|
|
|
|
README cleanup |
|
1535
|
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1537
|
|
|
|
|
|
|
dbcolsplit renamed dbcolsplittocols |
|
1538
|
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
=item NEW |
|
1540
|
|
|
|
|
|
|
dbcolsplittorows split one column into multiple rows |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=item NEW |
|
1543
|
|
|
|
|
|
|
dbcolsregression compute linear regression and correlation for two columns |
|
1544
|
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1546
|
|
|
|
|
|
|
cvs_to_db: better error handling, normalize field names, skip blank lines |
|
1547
|
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1549
|
|
|
|
|
|
|
dbjoin now detects (and fails) if non-joined files have duplicate names |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
=item BUG FIX |
|
1552
|
|
|
|
|
|
|
minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors) |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
=back |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
=head2 1.15, 12-Nov-07 |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=over 4 |
|
1559
|
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
=item NEW |
|
1561
|
|
|
|
|
|
|
fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.) |
|
1562
|
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
=item NEW |
|
1564
|
|
|
|
|
|
|
Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4. |
|
1565
|
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
=item NEW |
|
1567
|
|
|
|
|
|
|
dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats. |
|
1568
|
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1570
|
|
|
|
|
|
|
fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace. |
|
1571
|
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1573
|
|
|
|
|
|
|
dblib.pl is now moved to Fsdb::Old.pm. |
|
1574
|
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
=item BUG FIX |
|
1576
|
|
|
|
|
|
|
dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!). |
|
1577
|
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1579
|
|
|
|
|
|
|
dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.) |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1582
|
|
|
|
|
|
|
adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way. |
|
1583
|
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
=item BUG FIX |
|
1585
|
|
|
|
|
|
|
dbmultistats memory usage corrected (O(# tags), not O(1)) |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1588
|
|
|
|
|
|
|
dbmultistats can now optionally run with pre-grouped input in O(1) memory |
|
1589
|
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1591
|
|
|
|
|
|
|
dbroweval -N was finally implemented (eat comments) |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
=back |
|
1594
|
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
=head2 2.0, 25-Jan-08 |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete) |
|
1598
|
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
=over 4 |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
=item ENHANCEMENT: |
|
1602
|
|
|
|
|
|
|
shifting old programs to Perl modules, with |
|
1603
|
|
|
|
|
|
|
the front-end program as just a wrapper. |
|
1604
|
|
|
|
|
|
|
In the short-term, this change just means programs have real man pages. |
|
1605
|
|
|
|
|
|
|
In the long-run, it will mean that one can run a pipeline in a single |
|
1606
|
|
|
|
|
|
|
Perl program. |
|
1607
|
|
|
|
|
|
|
So far: |
|
1608
|
|
|
|
|
|
|
L, |
|
1609
|
|
|
|
|
|
|
L, |
|
1610
|
|
|
|
|
|
|
the new L. |
|
1611
|
|
|
|
|
|
|
L |
|
1612
|
|
|
|
|
|
|
the new L, |
|
1613
|
|
|
|
|
|
|
the old C (renamed L), |
|
1614
|
|
|
|
|
|
|
L, |
|
1615
|
|
|
|
|
|
|
L, |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=item NEW: |
|
1618
|
|
|
|
|
|
|
L is an internal-only module that lets one |
|
1619
|
|
|
|
|
|
|
use fsdb commands from within perl (via threads). |
|
1620
|
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
It also provides perl function aliases for the internal modules, |
|
1622
|
|
|
|
|
|
|
so a string of fsdb commands in perl are nearly as terse as in the |
|
1623
|
|
|
|
|
|
|
shell: |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
use Fsdb::Filter::dbpipeline qw(:all); |
|
1626
|
|
|
|
|
|
|
dbpipeline( |
|
1627
|
|
|
|
|
|
|
dbrow(qw(name test1)), |
|
1628
|
|
|
|
|
|
|
dbroweval('_test1 += 5;') |
|
1629
|
|
|
|
|
|
|
); |
|
1630
|
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
|
1632
|
|
|
|
|
|
|
The old L has been renamed L. |
|
1633
|
|
|
|
|
|
|
The new L does the same thing as the old L. |
|
1634
|
|
|
|
|
|
|
This incompatibility is unfortunate but normalizes program names. |
|
1635
|
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
=item CHANGE: |
|
1637
|
|
|
|
|
|
|
The new L program |
|
1638
|
|
|
|
|
|
|
always outputs C<-> (the default empty value) for |
|
1639
|
|
|
|
|
|
|
statistics it cannot compute (for example, standard deviation |
|
1640
|
|
|
|
|
|
|
if there is only one row), |
|
1641
|
|
|
|
|
|
|
instead of the old mix of C<-> and "na". |
|
1642
|
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
|
1644
|
|
|
|
|
|
|
The old L program, now called L, |
|
1645
|
|
|
|
|
|
|
also has different arguments. The C<-t mean,stddev> option is now |
|
1646
|
|
|
|
|
|
|
C<--tmean mean --tstddev stddev>. See L for details. |
|
1647
|
|
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
|
1649
|
|
|
|
|
|
|
L now assumes all new columns get the default |
|
1650
|
|
|
|
|
|
|
value rather than requiring each column to have an initial constant value. |
|
1651
|
|
|
|
|
|
|
To change the initial value, sue the new C<-e> option. |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
=item NEW: |
|
1654
|
|
|
|
|
|
|
L counts rows, an almost-subset of L's C output |
|
1655
|
|
|
|
|
|
|
(except without differentiating numeric/non-numeric input), |
|
1656
|
|
|
|
|
|
|
or the equivalent of C. |
|
1657
|
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
=item NEW: |
|
1659
|
|
|
|
|
|
|
L merges two sorted files. |
|
1660
|
|
|
|
|
|
|
This functionality was previously embedded in L. |
|
1661
|
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
|
1663
|
|
|
|
|
|
|
L's C<-i> option to include non-matches |
|
1664
|
|
|
|
|
|
|
is now renamed C<-a>, so as to not conflict with the new |
|
1665
|
|
|
|
|
|
|
standard option C<-i> for input file. |
|
1666
|
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
=back |
|
1668
|
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
=head2 2.1, 6-Apr-08 |
|
1670
|
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format |
|
1672
|
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
=over 4 |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
=item ENHANCEMENT: |
|
1676
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
|
1677
|
|
|
|
|
|
|
New in 2.1: |
|
1678
|
|
|
|
|
|
|
L, |
|
1679
|
|
|
|
|
|
|
L, |
|
1680
|
|
|
|
|
|
|
L, |
|
1681
|
|
|
|
|
|
|
L, |
|
1682
|
|
|
|
|
|
|
L, |
|
1683
|
|
|
|
|
|
|
L |
|
1684
|
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1686
|
|
|
|
|
|
|
L now handles an arbitrary number of input files, |
|
1687
|
|
|
|
|
|
|
not just exactly two. |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
=item NEW |
|
1690
|
|
|
|
|
|
|
L is an internal routine that handles merging exactly two files. |
|
1691
|
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1693
|
|
|
|
|
|
|
L now specifies inputs like L, |
|
1694
|
|
|
|
|
|
|
rather than assuming the first two arguments were tables (as in fsdb-1). |
|
1695
|
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
The old L argument C<-i> is now C<-a> or <--type=outer>. |
|
1697
|
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
A minor change: comments in the source files for |
|
1699
|
|
|
|
|
|
|
L are now intermixed with output |
|
1700
|
|
|
|
|
|
|
rather than being delayed until the end. |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1703
|
|
|
|
|
|
|
L now no longer produces warnings when null values are |
|
1704
|
|
|
|
|
|
|
passed to numeric comparisons. |
|
1705
|
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
=item BUG FIX |
|
1707
|
|
|
|
|
|
|
L now once again works with code that lacks a trailing semicolon. |
|
1708
|
|
|
|
|
|
|
(This bug fixes a regression from 1.15.) |
|
1709
|
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1711
|
|
|
|
|
|
|
L's old C<-e> option (to avoid end-of-line spaces) is now C<-E> |
|
1712
|
|
|
|
|
|
|
to avoid conflicts with the standard empty field argument. |
|
1713
|
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1715
|
|
|
|
|
|
|
L's old C<-e> option is now C<-E> to avoid conflicts. |
|
1716
|
|
|
|
|
|
|
And its C<-n>, C<-s>, and C<-w> are now |
|
1717
|
|
|
|
|
|
|
C<-N>, C<-S>, and C<-W> to correspond. |
|
1718
|
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
=item NEW |
|
1720
|
|
|
|
|
|
|
L replaces L, L, and L, |
|
1721
|
|
|
|
|
|
|
but with different options. |
|
1722
|
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1724
|
|
|
|
|
|
|
The library routines C now understand both list-format |
|
1725
|
|
|
|
|
|
|
and column-format data, so all converted programs can now |
|
1726
|
|
|
|
|
|
|
I read either format. This capability was one |
|
1727
|
|
|
|
|
|
|
of the milestone goals for 2.0, so yea! |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
=back |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
=head2 2.2, 23-May-08 |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
Release 2.2 is another 2.x alpha release. Now I of the |
|
1734
|
|
|
|
|
|
|
commands are ported, but a few remain, and I plan one last |
|
1735
|
|
|
|
|
|
|
incompatible change (to the file header) before 2.x final. |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
=over 4 |
|
1738
|
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
|
1742
|
|
|
|
|
|
|
New in 2.2: |
|
1743
|
|
|
|
|
|
|
L, |
|
1744
|
|
|
|
|
|
|
L. |
|
1745
|
|
|
|
|
|
|
L. |
|
1746
|
|
|
|
|
|
|
L. |
|
1747
|
|
|
|
|
|
|
L. |
|
1748
|
|
|
|
|
|
|
L. |
|
1749
|
|
|
|
|
|
|
L. |
|
1750
|
|
|
|
|
|
|
L. |
|
1751
|
|
|
|
|
|
|
L. |
|
1752
|
|
|
|
|
|
|
L. |
|
1753
|
|
|
|
|
|
|
L. |
|
1754
|
|
|
|
|
|
|
Also |
|
1755
|
|
|
|
|
|
|
L |
|
1756
|
|
|
|
|
|
|
exists only as a front-end (command-line) program. |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
|
1761
|
|
|
|
|
|
|
L, |
|
1762
|
|
|
|
|
|
|
L, |
|
1763
|
|
|
|
|
|
|
L, |
|
1764
|
|
|
|
|
|
|
L. |
|
1765
|
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
=item NEW |
|
1767
|
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
L to convert Apache logfiles |
|
1769
|
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1771
|
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
Options to L are now B<-B> and B<-I>, |
|
1773
|
|
|
|
|
|
|
not B<-a> and B<-i>. |
|
1774
|
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1776
|
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
L is now L. |
|
1778
|
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
=item BUG FIXES |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
L better handles empty columns; |
|
1782
|
|
|
|
|
|
|
L warning suppressed (actually a bug in high-bucket handling). |
|
1783
|
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1785
|
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
L now requires a C<-k> option in front of the |
|
1787
|
|
|
|
|
|
|
key (tag) field, or if none is given, it will group by the first field |
|
1788
|
|
|
|
|
|
|
(both like L). |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
=item KNOWN BUG |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
L with quantile option doesn't work currently. |
|
1793
|
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
L is renamed L. |
|
1797
|
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
=item BUG FIXES |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
L was leaving its log message as a command, not a comment. |
|
1801
|
|
|
|
|
|
|
Oops. No longer. |
|
1802
|
|
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
=back |
|
1804
|
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
=head2 2.3, 27-May-08 (alpha) |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
Another alpha release, this one just to fix the critical dbjoin bug |
|
1808
|
|
|
|
|
|
|
listed below (that happens to have blocked my MP3 jukebox :-). |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
=over 4 |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
=item BUG FIX |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
Dbsort no longer hangs if given an input file with no rows. |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=item BUG FIX |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
Dbjoin now works with unsorted input coming from a pipeline (like stdin). |
|
1819
|
|
|
|
|
|
|
Perl-5.8.8 has a bug (?) that was making this case fail---opening |
|
1820
|
|
|
|
|
|
|
stdin in one thread, reading some, then reading more in a different |
|
1821
|
|
|
|
|
|
|
thread caused an lseek which works on files, but fails on pipes like stdin. |
|
1822
|
|
|
|
|
|
|
Go figure. |
|
1823
|
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
=item BUG FIX / KNOWN BUG |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
The dbjoin fix also fixed dbmultistats -q |
|
1827
|
|
|
|
|
|
|
(it now gives the right answer). |
|
1828
|
|
|
|
|
|
|
Although a new bug appeared, messages like: |
|
1829
|
|
|
|
|
|
|
Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction. |
|
1830
|
|
|
|
|
|
|
So the dbmultistats_quartile test is still disabled. |
|
1831
|
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
=back |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
=head2 2.4, 18-Jun-08 |
|
1835
|
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
Another alpha release, mostly to fix minor usability |
|
1837
|
|
|
|
|
|
|
problems in dbmapreduce and client functions. |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
=over 4 |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
L now defaults to running user supplied code without warnings |
|
1844
|
|
|
|
|
|
|
(as with fsdb-1.x). |
|
1845
|
|
|
|
|
|
|
Use C<--warnings> or C<-w> to turn them back on. |
|
1846
|
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
L can now write different format output |
|
1850
|
|
|
|
|
|
|
than the input, using the C<-m> option. |
|
1851
|
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
=item KNOWN BUG |
|
1853
|
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
L emits warnings on perl 5.10.0 |
|
1855
|
|
|
|
|
|
|
about "Unbalanced string table refcount" and "Scalars leaked" |
|
1856
|
|
|
|
|
|
|
when run with an external program as a reducer. |
|
1857
|
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
L emits the warning "Attempt to free unreferenced scalar" |
|
1859
|
|
|
|
|
|
|
when run with quartiles. |
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
In each case the output is correct. |
|
1862
|
|
|
|
|
|
|
I believe these can be ignored. |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
=item CHANGE |
|
1865
|
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
L no longer logs a line for each reducer that is invoked. |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
=back |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
=head2 2.5, 24-Jun-08 |
|
1872
|
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
Another alpha release, fixing more minor bugs in |
|
1874
|
|
|
|
|
|
|
C and lossage in C. |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=over 4 |
|
1877
|
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
L can now tolerate non-map-aware reducers |
|
1881
|
|
|
|
|
|
|
that pass back the key column in put. |
|
1882
|
|
|
|
|
|
|
It also passes the current key as the last argument to |
|
1883
|
|
|
|
|
|
|
external reducers. |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
=item BUG FIX |
|
1886
|
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
L, correctly handle C<-header> option again. |
|
1888
|
|
|
|
|
|
|
(Broken since fsdb-2.3.) |
|
1889
|
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
=back |
|
1891
|
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
=head2 2.6, 11-Jul-08 |
|
1894
|
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
Another alpha release, needed to fix DaGronk. |
|
1896
|
|
|
|
|
|
|
One new port, small bug fixes, and important fix to L. |
|
1897
|
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
=over 4 |
|
1899
|
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1901
|
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
|
1903
|
|
|
|
|
|
|
New in 2.2: |
|
1904
|
|
|
|
|
|
|
L. |
|
1905
|
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE and ENHANCEMENTS |
|
1907
|
|
|
|
|
|
|
L arguments changed, |
|
1908
|
|
|
|
|
|
|
use C<--rank> to require ranking instead of C<-r>. |
|
1909
|
|
|
|
|
|
|
Also, C<--ascending> and C<--descending> can now be specified separately, |
|
1910
|
|
|
|
|
|
|
both for C<--percentile> and C<--rank>. |
|
1911
|
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=item BUG FIX |
|
1913
|
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
Sigh, the sense of the --warnings option in L was inverted. No longer. |
|
1915
|
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
=item BUG FIX |
|
1917
|
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
I found and fixed the string leaks (errors like "Unbalanced string |
|
1919
|
|
|
|
|
|
|
table refcount" and "Scalars leaked") in L and L. |
|
1920
|
|
|
|
|
|
|
(All Cs in threads must be manually destroyed.) |
|
1921
|
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
=item BUG FIX |
|
1923
|
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
The C<-C> option to specify the column separator in L |
|
1925
|
|
|
|
|
|
|
now works again (broken since it was ported). |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
=back |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
2.7, 30-Jul-08 beta |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
The beta release of fsdb-2.x. Finally, all programs are ported. |
|
1932
|
|
|
|
|
|
|
As statistics, the number of lines of non-library code doubled from |
|
1933
|
|
|
|
|
|
|
7.5k to 15.5k. The libraries are much more complete, |
|
1934
|
|
|
|
|
|
|
going from 866 to 5164 lines. |
|
1935
|
|
|
|
|
|
|
The overall number of programs is about the same, |
|
1936
|
|
|
|
|
|
|
although 19 were dropped and 11 were added. |
|
1937
|
|
|
|
|
|
|
The number of test cases has grown from 116 to 175. |
|
1938
|
|
|
|
|
|
|
All programs are now in perl-5, no more shell scripts or perl-4. |
|
1939
|
|
|
|
|
|
|
All programs now have manual pages. |
|
1940
|
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
Although this is a major step forward, I still expect |
|
1942
|
|
|
|
|
|
|
to rename "fsdb" to "fsdb". |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
=over 4 |
|
1945
|
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
|
1949
|
|
|
|
|
|
|
New in 2.7: |
|
1950
|
|
|
|
|
|
|
L. |
|
1951
|
|
|
|
|
|
|
L. |
|
1952
|
|
|
|
|
|
|
L. |
|
1953
|
|
|
|
|
|
|
L. |
|
1954
|
|
|
|
|
|
|
L. |
|
1955
|
|
|
|
|
|
|
L, |
|
1956
|
|
|
|
|
|
|
L, |
|
1957
|
|
|
|
|
|
|
L, |
|
1958
|
|
|
|
|
|
|
L, |
|
1959
|
|
|
|
|
|
|
L, |
|
1960
|
|
|
|
|
|
|
L. |
|
1961
|
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
|
1965
|
|
|
|
|
|
|
L, |
|
1966
|
|
|
|
|
|
|
L, |
|
1967
|
|
|
|
|
|
|
L. |
|
1968
|
|
|
|
|
|
|
L. |
|
1969
|
|
|
|
|
|
|
They may come back, but seemed overly specialized. |
|
1970
|
|
|
|
|
|
|
The following program |
|
1971
|
|
|
|
|
|
|
L |
|
1972
|
|
|
|
|
|
|
was dropped because it is superseded by L. |
|
1973
|
|
|
|
|
|
|
L |
|
1974
|
|
|
|
|
|
|
was dropped pending a test cases and examples. |
|
1975
|
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
L now has a C<-c> option to correct errors. |
|
1979
|
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
=item NEW |
|
1981
|
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
L provides the inverse of |
|
1983
|
|
|
|
|
|
|
L. |
|
1984
|
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
=back |
|
1986
|
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
=head2 2.8, 5-Aug-08 |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
Change header format, preserving forwards compatibility. |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
=over 4 |
|
1993
|
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=item BUG FIX |
|
1995
|
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
Complete editing pass over the manual, making sure it aligns |
|
1997
|
|
|
|
|
|
|
with fsdb-2.x. |
|
1998
|
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
=item SEMI-COMPATIBLE CHANGE |
|
2000
|
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
The header of fsdb files has changed, it is now #fsdb, not #h (or #L) |
|
2002
|
|
|
|
|
|
|
and parsing of -F and -R are also different. |
|
2003
|
|
|
|
|
|
|
See L for the new specification. |
|
2004
|
|
|
|
|
|
|
The v1 file format will be read, compatibly, but |
|
2005
|
|
|
|
|
|
|
not written. |
|
2006
|
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
=item BUG FIX |
|
2008
|
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
L now tolerates comments that precede the first key, |
|
2010
|
|
|
|
|
|
|
instead of failing with an error message. |
|
2011
|
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
=back |
|
2013
|
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
=head2 2.9, 6-Aug-08 |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
Still in beta; just a quick bug-fix for L. |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
=over 4 |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2022
|
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
|
2024
|
|
|
|
|
|
|
of input. |
|
2025
|
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
=back |
|
2027
|
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
=head2 2.10, 23-Sep-08 |
|
2029
|
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
|
2031
|
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
=over 4 |
|
2033
|
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2035
|
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
|
2037
|
|
|
|
|
|
|
of input. |
|
2038
|
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
L the warnings option was backwards; |
|
2042
|
|
|
|
|
|
|
now corrected. As a result, warnings in user code now default off |
|
2043
|
|
|
|
|
|
|
(like in fsdb-1.x). |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
=item BUG FIX |
|
2046
|
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
L now defaults to assuming the target column is numeric. |
|
2048
|
|
|
|
|
|
|
The new option C<-N> allows selection of a non-numeric target. |
|
2049
|
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
=item BUG FIX |
|
2051
|
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
L now includes C<--sample> and C<--nosample> options |
|
2053
|
|
|
|
|
|
|
to compute the sample or full population correlation coefficients. |
|
2054
|
|
|
|
|
|
|
Thanks to Xue Cai for finding this bug. |
|
2055
|
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
=back |
|
2057
|
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
|
|
2059
|
|
|
|
|
|
|
=head2 2.11, 14-Oct-08 |
|
2060
|
|
|
|
|
|
|
|
|
2061
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
|
2062
|
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
=over 4 |
|
2064
|
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2066
|
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
L is now more aggressive about filling in empty cells |
|
2068
|
|
|
|
|
|
|
with the official empty value, rather than leaving them blank or as whitespace. |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2071
|
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
L now catches failures during pipeline element setup |
|
2073
|
|
|
|
|
|
|
and exits reasonably gracefully. |
|
2074
|
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=item BUG FIX |
|
2076
|
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
L now reaps child processes, thus avoiding |
|
2078
|
|
|
|
|
|
|
running out of processes when used a lot. |
|
2079
|
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
=back |
|
2081
|
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
=head2 2.12, 16-Oct-08 |
|
2083
|
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
Finally, a full (non-beta) 2.x release! |
|
2085
|
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=over 4 |
|
2087
|
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
2089
|
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
Jdb has been renamed Fsdb, the flatfile-streaming database. |
|
2091
|
|
|
|
|
|
|
This change affects all internal Perl APIs, |
|
2092
|
|
|
|
|
|
|
but no shell command-level APIs. |
|
2093
|
|
|
|
|
|
|
While Jdb served well for more than ten years, |
|
2094
|
|
|
|
|
|
|
it is easily confused with the Java debugger (even though Jdb was there first!). |
|
2095
|
|
|
|
|
|
|
It also is too generic to work well in web search engines. |
|
2096
|
|
|
|
|
|
|
Finally, Jdb stands for ``John's database'', and we're a bit beyond that. |
|
2097
|
|
|
|
|
|
|
(However, some call me the ``file-system guy'', so |
|
2098
|
|
|
|
|
|
|
one could argue it retains that meeting.) |
|
2099
|
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
If you just used the shell commands, this change should not affect you. |
|
2101
|
|
|
|
|
|
|
If you used the Perl-level libraries directly in your code, |
|
2102
|
|
|
|
|
|
|
you should be able to rename "Jdb" to "Fsdb" to move to 2.12. |
|
2103
|
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
The jdb-announce list not yet been renamed, but it will be shortly. |
|
2105
|
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
With this release I've accomplished everything I wanted to |
|
2107
|
|
|
|
|
|
|
in fsdb-2.x. I therefore expect to return to boring, bugfix releases. |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
=back |
|
2110
|
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
=head2 2.13, 30-Oct-08 |
|
2112
|
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
=over 4 |
|
2114
|
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
=item BUG FIX |
|
2116
|
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
L now treats non-numeric data as zero by default. |
|
2118
|
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
=item BUG FIX |
|
2120
|
|
|
|
|
|
|
|
|
2121
|
|
|
|
|
|
|
Fixed a perl-5.10ism in L that |
|
2122
|
|
|
|
|
|
|
breaks that program under 5.8. |
|
2123
|
|
|
|
|
|
|
Thanks to Martin Lukac for reporting the bug. |
|
2124
|
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
=back |
|
2126
|
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
=head2 2.14, 26-Nov-08 |
|
2128
|
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
=over 4 |
|
2130
|
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
=item BUG FIX |
|
2132
|
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
Improved documentation for L's C<-f> option. |
|
2134
|
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2136
|
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
L how computes a moving standard deviation in addition |
|
2138
|
|
|
|
|
|
|
to a moving mean. |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
=back |
|
2141
|
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
|
|
2143
|
|
|
|
|
|
|
=head2 2.15, 13-Apr-09 |
|
2144
|
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
=over 4 |
|
2146
|
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
=item BUG FIX |
|
2148
|
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
Fix a F bug reported by Shalindra Fernando. |
|
2150
|
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=back |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
=head2 2.16, 14-Apr-09 |
|
2155
|
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
=over 4 |
|
2157
|
|
|
|
|
|
|
|
|
2158
|
|
|
|
|
|
|
=item BUG FIX |
|
2159
|
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
Another minor release bug: on some systems F looses |
|
2161
|
|
|
|
|
|
|
executable permissions. Again reported by Shalindra Fernando. |
|
2162
|
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
=back |
|
2164
|
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
=head2 2.17, 25-Jun-09 |
|
2166
|
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
=over 4 |
|
2168
|
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
=item TYPO FIXES |
|
2170
|
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
Typo in the F manual fixed. |
|
2172
|
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2174
|
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
There is no longer a comment line to label columns |
|
2176
|
|
|
|
|
|
|
in F, instead the header line is tweaked to |
|
2177
|
|
|
|
|
|
|
line up. This change restores the Jdb-1.x behavior, and |
|
2178
|
|
|
|
|
|
|
means that repeated runs of dbcolneaten no longer add comment lines |
|
2179
|
|
|
|
|
|
|
each time. |
|
2180
|
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
=item BUG FIX |
|
2182
|
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
It turns out F was not correctly handling trailing spaces |
|
2184
|
|
|
|
|
|
|
when given the C<-E> option to suppress them. This regression is now |
|
2185
|
|
|
|
|
|
|
fixed. |
|
2186
|
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
=item EXTENSION |
|
2188
|
|
|
|
|
|
|
|
|
2189
|
|
|
|
|
|
|
L can now handle direct references to the last row |
|
2190
|
|
|
|
|
|
|
via F<$lfref>, a dubious but now documented feature. |
|
2191
|
|
|
|
|
|
|
|
|
2192
|
|
|
|
|
|
|
=item BUG FIXES |
|
2193
|
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
Separators set with C<-C> in F and F |
|
2195
|
|
|
|
|
|
|
were not properly |
|
2196
|
|
|
|
|
|
|
setting the heading, and null fields were not recognized. |
|
2197
|
|
|
|
|
|
|
The first bug was reported by Martin Lukac. |
|
2198
|
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
=back |
|
2200
|
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
=head2 2.18, 1-Jul-09 A minor release |
|
2202
|
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
=over 4 |
|
2204
|
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2206
|
|
|
|
|
|
|
|
|
2207
|
|
|
|
|
|
|
Documentation for F has been improved. |
|
2208
|
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2210
|
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
The package should now be PGP-signed. |
|
2212
|
|
|
|
|
|
|
|
|
2213
|
|
|
|
|
|
|
=back |
|
2214
|
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
=head2 2.19, 10-Jul-09 |
|
2217
|
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
=over 4 |
|
2219
|
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
=item BUG FIX |
|
2221
|
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
Internal improvements to debugging output and robustness of |
|
2223
|
|
|
|
|
|
|
F and F. |
|
2224
|
|
|
|
|
|
|
F re-enabled. |
|
2225
|
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
=back |
|
2227
|
|
|
|
|
|
|
|
|
2228
|
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
=head2 2.20, 30-Nov-09 |
|
2230
|
|
|
|
|
|
|
(A collection of minor bugfixes, plus a build against Fedora 12.) |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
=over 4 |
|
2233
|
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
=item BUG FIX |
|
2235
|
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
Loging for |
|
2237
|
|
|
|
|
|
|
F |
|
2238
|
|
|
|
|
|
|
with code refs is now stable |
|
2239
|
|
|
|
|
|
|
(it no longer includes a hex pointer to the code reference). |
|
2240
|
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
=item BUG FIX |
|
2242
|
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
Better handling of mixed blank lines in F |
|
2244
|
|
|
|
|
|
|
(see test case F). |
|
2245
|
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
=item BUG FIX |
|
2247
|
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
F now handles multi-line input better, |
|
2249
|
|
|
|
|
|
|
and handles tables with COLSPAN. |
|
2250
|
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
=item BUG FIX |
|
2252
|
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
F now cleans up threads in an C |
|
2254
|
|
|
|
|
|
|
to prevent "cannot detach a joined thread" errors that popped |
|
2255
|
|
|
|
|
|
|
up in perl-5.10. Hopefully this prevents a race condition |
|
2256
|
|
|
|
|
|
|
that causes the test suites to hang about 20% of the time |
|
2257
|
|
|
|
|
|
|
(in F). |
|
2258
|
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2260
|
|
|
|
|
|
|
|
|
2261
|
|
|
|
|
|
|
F now detects and correctly fails |
|
2262
|
|
|
|
|
|
|
when the input and reducer have incompatible |
|
2263
|
|
|
|
|
|
|
field separators. |
|
2264
|
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2266
|
|
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
F, F, F, F, |
|
2268
|
|
|
|
|
|
|
and F |
|
2269
|
|
|
|
|
|
|
now all take an C<-F> option to let one specify the output field separator |
|
2270
|
|
|
|
|
|
|
(so they work better with F). |
|
2271
|
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
=item BUG FIX |
|
2273
|
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
An omitted C<-k> from the manual page of F |
|
2275
|
|
|
|
|
|
|
is now there. Bug reported by Unkyu Park. |
|
2276
|
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
=back |
|
2278
|
|
|
|
|
|
|
|
|
2279
|
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
=head2 2.21, 17-Apr-10 |
|
2281
|
|
|
|
|
|
|
bug fix release |
|
2282
|
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
=over 4 |
|
2284
|
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
=item BUG FIX |
|
2286
|
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
F now no longer fails with -outputheader => never |
|
2288
|
|
|
|
|
|
|
(an obscure bug). |
|
2289
|
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2291
|
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
F (in the warnings section) |
|
2293
|
|
|
|
|
|
|
and F now more carefully document how they |
|
2294
|
|
|
|
|
|
|
handle (and do not handle) numerical precision problems, |
|
2295
|
|
|
|
|
|
|
and other general limits. Thanks to Yuri Pradkin for prompting |
|
2296
|
|
|
|
|
|
|
this documentation. |
|
2297
|
|
|
|
|
|
|
|
|
2298
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2299
|
|
|
|
|
|
|
|
|
2300
|
|
|
|
|
|
|
C |
|
2301
|
|
|
|
|
|
|
is now restored from C. |
|
2302
|
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2304
|
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
Documention for multiple styles of input approaches |
|
2306
|
|
|
|
|
|
|
(including performance description) added to L. |
|
2307
|
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
=back |
|
2309
|
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
=head2 2.22, 2010-10-31 |
|
2311
|
|
|
|
|
|
|
One new tool F and several bug fixes for Perl 5.10. |
|
2312
|
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
=over 4 |
|
2314
|
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
=item BUG FIX |
|
2316
|
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
F now correctly handles n-way merges. |
|
2318
|
|
|
|
|
|
|
Bug reported by Yuri Pradkin. |
|
2319
|
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
=item INCOMPARABLE CHANGE |
|
2321
|
|
|
|
|
|
|
|
|
2322
|
|
|
|
|
|
|
F now defaults to I padding the last column. |
|
2323
|
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
=item ADDITION |
|
2325
|
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
F now takes B<-N NewColumn> to give the new |
|
2327
|
|
|
|
|
|
|
column a name other than "count". Feature requested by Mike Rouch |
|
2328
|
|
|
|
|
|
|
in January 2005. |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
=item ADDITION |
|
2331
|
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
New program F copies the last value of a column |
|
2333
|
|
|
|
|
|
|
into a new column copylast_column of the next row. |
|
2334
|
|
|
|
|
|
|
New program requested by Fabio Silva; |
|
2335
|
|
|
|
|
|
|
useful for converting dbmultistats output into dbrvstatdiff input. |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
=item BUG FIX |
|
2338
|
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
Several tools (particularly F and F) would |
|
2340
|
|
|
|
|
|
|
report errors like "Unbalanced string table refcount: (1) for "STDOUT" |
|
2341
|
|
|
|
|
|
|
during global destruction" on exit, at least on certain versions |
|
2342
|
|
|
|
|
|
|
of Perl (for me on 5.10.1), but similar errors have been off-and-on |
|
2343
|
|
|
|
|
|
|
for several Perl releases. Although I think my code looked |
|
2344
|
|
|
|
|
|
|
OK, I worked around this problem with a different way of handling |
|
2345
|
|
|
|
|
|
|
standard IO redirection. |
|
2346
|
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
=back |
|
2348
|
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
=head2 2.23, 2011-03-10 |
|
2351
|
|
|
|
|
|
|
Several small portability bugfixes; improved F for large datasets |
|
2352
|
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
=over 4 |
|
2354
|
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2356
|
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
Documentation to F was changed to use "sd" to refer to |
|
2358
|
|
|
|
|
|
|
standard deviation, not "ss" (which might be confused with sum-of-squares). |
|
2359
|
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
=item BUG FIX |
|
2361
|
|
|
|
|
|
|
|
|
2362
|
|
|
|
|
|
|
This documentation about F was missing the F<-k> option |
|
2363
|
|
|
|
|
|
|
in some cases. |
|
2364
|
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
=item BUG FIX |
|
2366
|
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
F was failing on MacOS-10.6.3 for some tests with |
|
2368
|
|
|
|
|
|
|
the error |
|
2369
|
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl) |
|
2371
|
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
The problem seemed to be only in the error, not in operation. |
|
2373
|
|
|
|
|
|
|
On MacOS, the error is now suppressed. |
|
2374
|
|
|
|
|
|
|
Thanks to Alefiya Hussain for providing access to a Mac system |
|
2375
|
|
|
|
|
|
|
that allowed debugging of this problem. |
|
2376
|
|
|
|
|
|
|
|
|
2377
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2378
|
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
The F command requires an external |
|
2380
|
|
|
|
|
|
|
Perl library (F). On computers that |
|
2381
|
|
|
|
|
|
|
lack this optional library, previously Fsdb would configure |
|
2382
|
|
|
|
|
|
|
with a warning and then test cases would fail. |
|
2383
|
|
|
|
|
|
|
Now those test cases are skipped with an additional warning. |
|
2384
|
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
=item BUG FIX |
|
2386
|
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
The test suite now supports alternative valid output, as a hack |
|
2388
|
|
|
|
|
|
|
to account for last-digit floating point differences. |
|
2389
|
|
|
|
|
|
|
(Not very satisfying :-( |
|
2390
|
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
=item BUG FIX |
|
2392
|
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
F output for confidence intervals on very large |
|
2394
|
|
|
|
|
|
|
datasets has changed. Previously it failed for more than 2^31-1 |
|
2395
|
|
|
|
|
|
|
records, and handling of T-Distributions with thousands of rows |
|
2396
|
|
|
|
|
|
|
was a bit dubious. Now datasets with more than 10000 are considered |
|
2397
|
|
|
|
|
|
|
infinitely large and hopefully correctly handled. |
|
2398
|
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
=back |
|
2400
|
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
=head2 2.24, 2011-04-15 |
|
2402
|
|
|
|
|
|
|
Improvements to fix an old bug in dbmapreduce with different field separators |
|
2403
|
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
=over 4 |
|
2405
|
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2407
|
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
The F command had a C<--correct> option to |
|
2409
|
|
|
|
|
|
|
work-around from incompatible field-separators, |
|
2410
|
|
|
|
|
|
|
but it did nothing. Now it does the correct but sad, data-loosing |
|
2411
|
|
|
|
|
|
|
thing. |
|
2412
|
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2414
|
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
The F command |
|
2416
|
|
|
|
|
|
|
previously failed with an error message when invoked |
|
2417
|
|
|
|
|
|
|
on input with a non-default field separator. |
|
2418
|
|
|
|
|
|
|
The root cause was the underlying F |
|
2419
|
|
|
|
|
|
|
that did not handle the case of reducers that generated |
|
2420
|
|
|
|
|
|
|
output with a different field separator than the input. |
|
2421
|
|
|
|
|
|
|
We now detect and repair incompatible field separators. |
|
2422
|
|
|
|
|
|
|
This change corrects a problem originally documented and detected |
|
2423
|
|
|
|
|
|
|
in Fsdb-2.20. |
|
2424
|
|
|
|
|
|
|
Bug re-reported by Unkyu Park. |
|
2425
|
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
=back |
|
2427
|
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
=head2 2.25, 2011-08-07 |
|
2429
|
|
|
|
|
|
|
Two new tools, F and F, and a bugfix for two people. |
|
2430
|
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
=over 4 |
|
2432
|
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2434
|
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
F now supports a F<--utc> option, |
|
2436
|
|
|
|
|
|
|
which also fixes this test case for users outside of the Pacific |
|
2437
|
|
|
|
|
|
|
time zone. Bug reported by David Graff, and also by Peter Desnoyers |
|
2438
|
|
|
|
|
|
|
(within a week of each other :-) |
|
2439
|
|
|
|
|
|
|
|
|
2440
|
|
|
|
|
|
|
=item NEW |
|
2441
|
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
F can convert simple, very regular XML files into Fsdb. |
|
2443
|
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
=item NEW |
|
2445
|
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
F "pivots" a file, converting multiple rows |
|
2447
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
|
2448
|
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
=back |
|
2450
|
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
=head2 2.26, 2011-12-12 |
|
2452
|
|
|
|
|
|
|
Bug fixes, particularly for perl-5.14.2. |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
=over 4 |
|
2455
|
|
|
|
|
|
|
|
|
2456
|
|
|
|
|
|
|
=item BUG FIX |
|
2457
|
|
|
|
|
|
|
|
|
2458
|
|
|
|
|
|
|
Bugs fixed in L manual page. |
|
2459
|
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
=item BUG FIX |
|
2461
|
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
Fixed problems where L was truncating floating point numbers |
|
2463
|
|
|
|
|
|
|
when sorting. This strange behavior happens as of perl-5.14.2 and |
|
2464
|
|
|
|
|
|
|
it I like a Perl bug. I've worked around it for the test suites, |
|
2465
|
|
|
|
|
|
|
but I'm a bit nervous. |
|
2466
|
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
=back |
|
2468
|
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
=head2 2.27, 2012-11-15 |
|
2470
|
|
|
|
|
|
|
Accumulated bug fixes. |
|
2471
|
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
=over 4 |
|
2473
|
|
|
|
|
|
|
|
|
2474
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2475
|
|
|
|
|
|
|
|
|
2476
|
|
|
|
|
|
|
F now reports errors in CVS input with real diagnostics. |
|
2477
|
|
|
|
|
|
|
|
|
2478
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2479
|
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
F can now compute median, when given the C<-m> option. |
|
2481
|
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
=item BUG FIX |
|
2483
|
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
F non-numeric handling (the C<-a> option) now works properly. |
|
2485
|
|
|
|
|
|
|
|
|
2486
|
|
|
|
|
|
|
=item DOCUMENTATION |
|
2487
|
|
|
|
|
|
|
|
|
2488
|
|
|
|
|
|
|
The internal |
|
2489
|
|
|
|
|
|
|
F test framework |
|
2490
|
|
|
|
|
|
|
is now documented. |
|
2491
|
|
|
|
|
|
|
|
|
2492
|
|
|
|
|
|
|
=item BUG FIX |
|
2493
|
|
|
|
|
|
|
|
|
2494
|
|
|
|
|
|
|
F now correctly handles the case where there is no input |
|
2495
|
|
|
|
|
|
|
(previously it output a blank line, which is a malformed fsdb file). |
|
2496
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug. |
|
2497
|
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
=back |
|
2499
|
|
|
|
|
|
|
|
|
2500
|
|
|
|
|
|
|
=head2 2.28, 2012-11-15 |
|
2501
|
|
|
|
|
|
|
A quick release to fix most rpmlint errors. |
|
2502
|
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
=over 4 |
|
2504
|
|
|
|
|
|
|
|
|
2505
|
|
|
|
|
|
|
=item BUG FIX |
|
2506
|
|
|
|
|
|
|
|
|
2507
|
|
|
|
|
|
|
Fixed a number of minor release problems (wrong permissions, old FSF |
|
2508
|
|
|
|
|
|
|
address, etc.) found by rpmlint. |
|
2509
|
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
=back |
|
2511
|
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
=head2 2.29, 2012-11-20 |
|
2513
|
|
|
|
|
|
|
a quick release for CPAN testing |
|
2514
|
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
=over 4 |
|
2516
|
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2518
|
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
Tweaked the RPM spec. |
|
2520
|
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2522
|
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
Modified F to fail gracefully on Perl installations |
|
2524
|
|
|
|
|
|
|
that lack threads. (Without this fix, I get massive failures |
|
2525
|
|
|
|
|
|
|
in the non-ithreads test system.) |
|
2526
|
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
=back |
|
2528
|
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
=head2 2.30, 2012-11-25 |
|
2530
|
|
|
|
|
|
|
improvements to perl portability |
|
2531
|
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
=over 4 |
|
2533
|
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
=item BUG FIX |
|
2535
|
|
|
|
|
|
|
|
|
2536
|
|
|
|
|
|
|
Removed unicode character in documention of F |
|
2537
|
|
|
|
|
|
|
so pod tests will pass. (Sigh, that should work :-( ) |
|
2538
|
|
|
|
|
|
|
|
|
2539
|
|
|
|
|
|
|
=item BUG FIX |
|
2540
|
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
Fixed test suite failures on 5 tests (F |
|
2542
|
|
|
|
|
|
|
was the first) due to L's addition of a period. |
|
2543
|
|
|
|
|
|
|
This problem was breaking Fsdb on perl-5.17. |
|
2544
|
|
|
|
|
|
|
Thanks to Michael McQuaid for helping diagnose this problem. |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2547
|
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
The test suite now prints out the names of tests it tries. |
|
2549
|
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
=back |
|
2551
|
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
=head2 2.31, 2012-11-28 |
|
2553
|
|
|
|
|
|
|
A release with actual improvements to dbfilepivot and dbrowuniq. |
|
2554
|
|
|
|
|
|
|
|
|
2555
|
|
|
|
|
|
|
=over 4 |
|
2556
|
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
=item BUG FIX |
|
2558
|
|
|
|
|
|
|
|
|
2559
|
|
|
|
|
|
|
Documentation fixes: typos in L, |
|
2560
|
|
|
|
|
|
|
bugs in L, |
|
2561
|
|
|
|
|
|
|
clarification for comment handling in L. |
|
2562
|
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2564
|
|
|
|
|
|
|
|
|
2565
|
|
|
|
|
|
|
Previously L assumed the input was grouped by keys |
|
2566
|
|
|
|
|
|
|
and didn't very that pre-condition. |
|
2567
|
|
|
|
|
|
|
Now there is no pre-condition (it will sort the input by default), |
|
2568
|
|
|
|
|
|
|
and it checks if the invariant is violated. |
|
2569
|
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
=item BUG FIX |
|
2571
|
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
Previously L failed if the input had comments (oops :-); |
|
2573
|
|
|
|
|
|
|
no longer. |
|
2574
|
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2576
|
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
Now L has the C<-L> option to preserve the last |
|
2578
|
|
|
|
|
|
|
unique row (instead of the first), a common idiom. |
|
2579
|
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
=back |
|
2581
|
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
=head2 2.32, 2012-12-21 |
|
2583
|
|
|
|
|
|
|
Test suites should now be more numerically robust. |
|
2584
|
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
=over 4 |
|
2586
|
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
=item NEW |
|
2588
|
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
New L does fsdb-aware file differencing. |
|
2590
|
|
|
|
|
|
|
It does not do smart intuition of add/removes like Unix diff(1), |
|
2591
|
|
|
|
|
|
|
but it does know about columns, and with C<-E>, it does |
|
2592
|
|
|
|
|
|
|
numeric-aware differences. |
|
2593
|
|
|
|
|
|
|
|
|
2594
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2595
|
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
Test suites that are numeric now use L to do numeric-aware |
|
2597
|
|
|
|
|
|
|
comparisons, so the test suite should now be robust to slightly different |
|
2598
|
|
|
|
|
|
|
computers and operating systems and compilers than I what I use. |
|
2599
|
|
|
|
|
|
|
|
|
2600
|
|
|
|
|
|
|
=back |
|
2601
|
|
|
|
|
|
|
|
|
2602
|
|
|
|
|
|
|
=head2 2.33, 2012-12-23 |
|
2603
|
|
|
|
|
|
|
Minor fixes to some test cases. |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
=over 4 |
|
2606
|
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2608
|
|
|
|
|
|
|
|
|
2609
|
|
|
|
|
|
|
L and L |
|
2610
|
|
|
|
|
|
|
now supports the C<-N> option to give the new column a |
|
2611
|
|
|
|
|
|
|
different name. (And a test cases where this duplication mattered |
|
2612
|
|
|
|
|
|
|
have been fixed.) |
|
2613
|
|
|
|
|
|
|
|
|
2614
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2615
|
|
|
|
|
|
|
|
|
2616
|
|
|
|
|
|
|
L now show the t-test breakpoint with a reasonable number of |
|
2617
|
|
|
|
|
|
|
floating point digits. |
|
2618
|
|
|
|
|
|
|
|
|
2619
|
|
|
|
|
|
|
=item BUG FIX |
|
2620
|
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
Fixed a numerical stability problem in the F test case. |
|
2622
|
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
=back |
|
2624
|
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
|
2626
|
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
=head2 2.34, 2013-02-10 |
|
2628
|
|
|
|
|
|
|
Parallelism in L. |
|
2629
|
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
=over 4 |
|
2631
|
|
|
|
|
|
|
|
|
2632
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2633
|
|
|
|
|
|
|
|
|
2634
|
|
|
|
|
|
|
Documention for L now includes resource requirements. |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2637
|
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
Default memory usage for L is now about 256MB. |
|
2639
|
|
|
|
|
|
|
(The world keeps moving forward.) |
|
2640
|
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2642
|
|
|
|
|
|
|
|
|
2643
|
|
|
|
|
|
|
L now does merging in parallel. |
|
2644
|
|
|
|
|
|
|
As a side-effect, L should be faster when |
|
2645
|
|
|
|
|
|
|
input overflows memory. The level of parallelism |
|
2646
|
|
|
|
|
|
|
can be limited with the C<--parallelism> option. |
|
2647
|
|
|
|
|
|
|
(There is more work to do here, but we're off to a start.) |
|
2648
|
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
=back |
|
2650
|
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
=head2 2.35, 2013-02-23 |
|
2652
|
|
|
|
|
|
|
Improvements to dbmerge parallelism |
|
2653
|
|
|
|
|
|
|
|
|
2654
|
|
|
|
|
|
|
=over 4 |
|
2655
|
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
=item BUG FIX |
|
2657
|
|
|
|
|
|
|
|
|
2658
|
|
|
|
|
|
|
Fsdb temporary files are now created more securely (with File::Temp). |
|
2659
|
|
|
|
|
|
|
|
|
2660
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2661
|
|
|
|
|
|
|
|
|
2662
|
|
|
|
|
|
|
Programs that sort or merge on fields (L, L, L, |
|
2663
|
|
|
|
|
|
|
L) now report an error if no fields on which to join or merge |
|
2664
|
|
|
|
|
|
|
are given. |
|
2665
|
|
|
|
|
|
|
|
|
2666
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2667
|
|
|
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
Parallelism in L is should now be more consistent, |
|
2669
|
|
|
|
|
|
|
with less starting and stopping. |
|
2670
|
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2672
|
|
|
|
|
|
|
In L, the C<--xargs> option lets one give input filenames on |
|
2673
|
|
|
|
|
|
|
standard input, rather than the command line. |
|
2674
|
|
|
|
|
|
|
This feature paves the way for faster dbsort for large inputs |
|
2675
|
|
|
|
|
|
|
(by pipelining sorting and merging), expected in the next release. |
|
2676
|
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
=back |
|
2678
|
|
|
|
|
|
|
|
|
2679
|
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
=head2 2.36, 2013-02-25 |
|
2681
|
|
|
|
|
|
|
dbsort pipelines with dbmerge |
|
2682
|
|
|
|
|
|
|
|
|
2683
|
|
|
|
|
|
|
=over 4 |
|
2684
|
|
|
|
|
|
|
|
|
2685
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2686
|
|
|
|
|
|
|
For large inputs, |
|
2687
|
|
|
|
|
|
|
L now pipelines sorting and merging, |
|
2688
|
|
|
|
|
|
|
allowing earlier processing. |
|
2689
|
|
|
|
|
|
|
|
|
2690
|
|
|
|
|
|
|
=item BUG FIX |
|
2691
|
|
|
|
|
|
|
Since 2.35, L delayed cleanup of intermediate files, |
|
2692
|
|
|
|
|
|
|
thereby requiring extra disk space. |
|
2693
|
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
=back |
|
2695
|
|
|
|
|
|
|
|
|
2696
|
|
|
|
|
|
|
=head2 2.37, 2013-02-26 |
|
2697
|
|
|
|
|
|
|
quick bugfix to support parallel sort and merge from recent releases |
|
2698
|
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
=over 4 |
|
2700
|
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
=item BUG FIX |
|
2702
|
|
|
|
|
|
|
Since 2.35, L delayed removal of input files given by |
|
2703
|
|
|
|
|
|
|
C<--xargs>. This problem is now fixed. |
|
2704
|
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
=back |
|
2706
|
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
=head2 2.38, 2013-04-29 |
|
2709
|
|
|
|
|
|
|
minor bug fixes |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
=over 4 |
|
2712
|
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
=item CLARIFICATION |
|
2714
|
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
Configure now rejects Windows since tests seem to hang |
|
2716
|
|
|
|
|
|
|
on some versions of Windows. |
|
2717
|
|
|
|
|
|
|
(I would love help from a Windows developer to get this problem fixed, |
|
2718
|
|
|
|
|
|
|
but I cannot do it.) See F. |
|
2719
|
|
|
|
|
|
|
|
|
2720
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2721
|
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
All programs that use temporary files |
|
2723
|
|
|
|
|
|
|
(L, L, L, L) |
|
2724
|
|
|
|
|
|
|
now take the C<-T> option |
|
2725
|
|
|
|
|
|
|
and set the temporary directory consistently. |
|
2726
|
|
|
|
|
|
|
|
|
2727
|
|
|
|
|
|
|
In addition, error messages are better when the temporary directory |
|
2728
|
|
|
|
|
|
|
has problems. Problem reported by Liang Zhu. |
|
2729
|
|
|
|
|
|
|
|
|
2730
|
|
|
|
|
|
|
=item BUG FIX |
|
2731
|
|
|
|
|
|
|
|
|
2732
|
|
|
|
|
|
|
L was failing with external, map-reduce aware reducers |
|
2733
|
|
|
|
|
|
|
(when invoked with -M and an external program). |
|
2734
|
|
|
|
|
|
|
(Sigh, did this case ever work?) |
|
2735
|
|
|
|
|
|
|
This case should now work. |
|
2736
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2011). |
|
2737
|
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
=item BUG FIX |
|
2739
|
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
Fixed perl-5.10 problem with L. |
|
2741
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2013). |
|
2742
|
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
=back |
|
2744
|
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
=head2 2.39, date 2013-05-31 |
|
2746
|
|
|
|
|
|
|
quick release for the dbrowuniq extension |
|
2747
|
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
=over 4 |
|
2749
|
|
|
|
|
|
|
|
|
2750
|
|
|
|
|
|
|
=item BUG FIX |
|
2751
|
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
Actually in 2.38, the Fedora F<.spec> got cleaner dependencies. |
|
2753
|
|
|
|
|
|
|
Suggestion from Christopher Meng via L. |
|
2754
|
|
|
|
|
|
|
|
|
2755
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2756
|
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
Fsdb files are now explicitly set into UTF-8 encoding, |
|
2758
|
|
|
|
|
|
|
unless one specifies C<-encoding> to C. |
|
2759
|
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2761
|
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
L now supports C<-I> for incremental counting. |
|
2763
|
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
=back |
|
2765
|
|
|
|
|
|
|
|
|
2766
|
|
|
|
|
|
|
=head2 2.40, 2013-07-13 |
|
2767
|
|
|
|
|
|
|
small bug fixes |
|
2768
|
|
|
|
|
|
|
|
|
2769
|
|
|
|
|
|
|
=over 4 |
|
2770
|
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
=item BUG FIX |
|
2772
|
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
L now has more respect for a user-given temporary directory; |
|
2774
|
|
|
|
|
|
|
it no longer is ignored for merging. |
|
2775
|
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
2777
|
|
|
|
|
|
|
|
|
2778
|
|
|
|
|
|
|
L now has options to output the first, last, and both first |
|
2779
|
|
|
|
|
|
|
and last rows of a run (C<-F>, C<-L>, and C<-B>). |
|
2780
|
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
=item BUG FIX |
|
2782
|
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
L now correctly handles C<-N>. Sigh, it didn't work before. |
|
2784
|
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
=back |
|
2786
|
|
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
=head2 2.41, 2013-07-29 |
|
2788
|
|
|
|
|
|
|
small bug and packaging fixes |
|
2789
|
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
=over 4 |
|
2791
|
|
|
|
|
|
|
|
|
2792
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2793
|
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
Documentation to L improved |
|
2795
|
|
|
|
|
|
|
(inspired by questions from Qian Kun). |
|
2796
|
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
=item BUG FIX |
|
2798
|
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
L no longer duplicates |
|
2800
|
|
|
|
|
|
|
singleton unique lines when outputting both (with C<-B>). |
|
2801
|
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
=item BUG FIX |
|
2803
|
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
Add missing C dependency to F. |
|
2805
|
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2807
|
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
Tests now show the diff of the failing output |
|
2809
|
|
|
|
|
|
|
if run with C. |
|
2810
|
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2812
|
|
|
|
|
|
|
|
|
2813
|
|
|
|
|
|
|
L now includes documentation for how to output extra rows. |
|
2814
|
|
|
|
|
|
|
Suggestion from Yuri Pradkin. |
|
2815
|
|
|
|
|
|
|
|
|
2816
|
|
|
|
|
|
|
=item BUG FIX |
|
2817
|
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
Several improvements to the Fedora package |
|
2819
|
|
|
|
|
|
|
from Michael Schwendt |
|
2820
|
|
|
|
|
|
|
via L, |
|
2821
|
|
|
|
|
|
|
and from the harsh master that is F. |
|
2822
|
|
|
|
|
|
|
(I am stymied at teaching it that "outliers" is spelled correctly. |
|
2823
|
|
|
|
|
|
|
Maybe I should send it Schneier's book. And an unresolvable |
|
2824
|
|
|
|
|
|
|
invalid-spec-name lurks in the SRPM.) |
|
2825
|
|
|
|
|
|
|
|
|
2826
|
|
|
|
|
|
|
=back |
|
2827
|
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
=head2 2.42, 2013-07-31 |
|
2829
|
|
|
|
|
|
|
A bug fix and packaging release. |
|
2830
|
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
=over 4 |
|
2832
|
|
|
|
|
|
|
|
|
2833
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2834
|
|
|
|
|
|
|
|
|
2835
|
|
|
|
|
|
|
Documentation to L improved |
|
2836
|
|
|
|
|
|
|
to better memory usage. |
|
2837
|
|
|
|
|
|
|
(Based on problem report by Lin Quan.) |
|
2838
|
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
=item BUG FIX |
|
2840
|
|
|
|
|
|
|
|
|
2841
|
|
|
|
|
|
|
The F<.spec> is now F |
|
2842
|
|
|
|
|
|
|
to satisfy F. |
|
2843
|
|
|
|
|
|
|
Thanks to Christopher Meng for a specific bug report. |
|
2844
|
|
|
|
|
|
|
|
|
2845
|
|
|
|
|
|
|
=item BUG FIX |
|
2846
|
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
Test F no longer has a column |
|
2848
|
|
|
|
|
|
|
that caused failures because of numerical instability. |
|
2849
|
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
=item BUG FIX |
|
2851
|
|
|
|
|
|
|
|
|
2852
|
|
|
|
|
|
|
Some tests now better handle bugs in old versions of perl (5.10, 5.12). |
|
2853
|
|
|
|
|
|
|
Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12, |
|
2854
|
|
|
|
|
|
|
but the fix should affect other platforms. |
|
2855
|
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
=back |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
=head2 2.43, 2013-08-27 |
|
2859
|
|
|
|
|
|
|
Adds in-file compression. |
|
2860
|
|
|
|
|
|
|
|
|
2861
|
|
|
|
|
|
|
=over 4 |
|
2862
|
|
|
|
|
|
|
|
|
2863
|
|
|
|
|
|
|
=item BUG FIX |
|
2864
|
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
Changed the sort on F to strings |
|
2866
|
|
|
|
|
|
|
(from numerics) so we're less susceptible to false test-failures |
|
2867
|
|
|
|
|
|
|
due to floating point IO differences. |
|
2868
|
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
=item EXPERIMENTAL ENHANCEMENT |
|
2870
|
|
|
|
|
|
|
|
|
2871
|
|
|
|
|
|
|
Yet more parallelism in L: |
|
2872
|
|
|
|
|
|
|
new "endgame-mode" builds a merge tree of processes at the end |
|
2873
|
|
|
|
|
|
|
of large merge tasks to get maximally parallelism. |
|
2874
|
|
|
|
|
|
|
Currently this feature is off by default |
|
2875
|
|
|
|
|
|
|
because it can hang for some inputs. |
|
2876
|
|
|
|
|
|
|
Enable this experimental feature with C<--endgame>. |
|
2877
|
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2879
|
|
|
|
|
|
|
|
|
2880
|
|
|
|
|
|
|
C now handles being given C objects |
|
2881
|
|
|
|
|
|
|
(as exercised by L). |
|
2882
|
|
|
|
|
|
|
|
|
2883
|
|
|
|
|
|
|
=item BUG FIX |
|
2884
|
|
|
|
|
|
|
|
|
2885
|
|
|
|
|
|
|
Handling of NamedTmpfiles now supports concurrency. |
|
2886
|
|
|
|
|
|
|
This fix will hopefully fix occasional |
|
2887
|
|
|
|
|
|
|
"Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93." |
|
2888
|
|
|
|
|
|
|
errors. |
|
2889
|
|
|
|
|
|
|
|
|
2890
|
|
|
|
|
|
|
=item BUG FIX |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
Fsdb now requires perl 5.10. |
|
2893
|
|
|
|
|
|
|
This is a bug fix because some test cases used to require it, |
|
2894
|
|
|
|
|
|
|
but this fact was not properly documented. |
|
2895
|
|
|
|
|
|
|
(Back-porting to 5.008 would require removing all C/> operators.) |
|
2896
|
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2898
|
|
|
|
|
|
|
|
|
2899
|
|
|
|
|
|
|
Fsdb now handles automatic compression of file contents. |
|
2900
|
|
|
|
|
|
|
Enable compression with C |
|
2901
|
|
|
|
|
|
|
(or C or C). |
|
2902
|
|
|
|
|
|
|
All programs should operate on compressed files |
|
2903
|
|
|
|
|
|
|
and leave the output with the same level of compression. |
|
2904
|
|
|
|
|
|
|
C is recommended as fastest and most efficient. |
|
2905
|
|
|
|
|
|
|
C is produces unrepeatable output (and so has no |
|
2906
|
|
|
|
|
|
|
output test), it seems to insist on adding a timestamp. |
|
2907
|
|
|
|
|
|
|
|
|
2908
|
|
|
|
|
|
|
=back |
|
2909
|
|
|
|
|
|
|
|
|
2910
|
|
|
|
|
|
|
=head2 2.44, 2013-10-02 |
|
2911
|
|
|
|
|
|
|
A major change--all threads are gone. |
|
2912
|
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
=over 4 |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2916
|
|
|
|
|
|
|
|
|
2917
|
|
|
|
|
|
|
Fsdb is now thread free and only uses processes for parallelism. |
|
2918
|
|
|
|
|
|
|
This change is a big change--the entire motivation for Fsdb-2 |
|
2919
|
|
|
|
|
|
|
was to exploit parallelism via threading. |
|
2920
|
|
|
|
|
|
|
Parallelism--good, but perl threading--bad for performance. |
|
2921
|
|
|
|
|
|
|
Horribly bad for performance. |
|
2922
|
|
|
|
|
|
|
About 20x worse than pipes on my box. |
|
2923
|
|
|
|
|
|
|
(See perl bug #119445 for the discussion.) |
|
2924
|
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
=item NEW |
|
2926
|
|
|
|
|
|
|
|
|
2927
|
|
|
|
|
|
|
C provides a thread-like abstraction over forking, |
|
2928
|
|
|
|
|
|
|
with some nice support for callbacks in the parent upon child termination. |
|
2929
|
|
|
|
|
|
|
|
|
2930
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2931
|
|
|
|
|
|
|
|
|
2932
|
|
|
|
|
|
|
Details about removing threads: |
|
2933
|
|
|
|
|
|
|
C is thread free, |
|
2934
|
|
|
|
|
|
|
and new tests to verify each of its parts. |
|
2935
|
|
|
|
|
|
|
The easy cases are C, |
|
2936
|
|
|
|
|
|
|
C, C, C, and |
|
2937
|
|
|
|
|
|
|
C, each of which use it in simple ways (2013-09-09). |
|
2938
|
|
|
|
|
|
|
C is now thread free (2013-09-13), |
|
2939
|
|
|
|
|
|
|
but was a significant rewrite, |
|
2940
|
|
|
|
|
|
|
which brought C along. |
|
2941
|
|
|
|
|
|
|
C is partly thread free (2013-09-21), |
|
2942
|
|
|
|
|
|
|
again as a rewrite, |
|
2943
|
|
|
|
|
|
|
and it brings C along. |
|
2944
|
|
|
|
|
|
|
Full C support took much longer (2013-10-02). |
|
2945
|
|
|
|
|
|
|
|
|
2946
|
|
|
|
|
|
|
=item BUG FIX |
|
2947
|
|
|
|
|
|
|
|
|
2948
|
|
|
|
|
|
|
When running with user-only output (C<-n>), |
|
2949
|
|
|
|
|
|
|
L now resets the output vector C<$ofref> |
|
2950
|
|
|
|
|
|
|
after it has been output. |
|
2951
|
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
=item NEW |
|
2953
|
|
|
|
|
|
|
|
|
2954
|
|
|
|
|
|
|
L will create all columns at the head of each row |
|
2955
|
|
|
|
|
|
|
with the C<--first> option. |
|
2956
|
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
=item NEW |
|
2958
|
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
L will concatenate two files, |
|
2960
|
|
|
|
|
|
|
verifying that they have the same schema. |
|
2961
|
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
2963
|
|
|
|
|
|
|
|
|
2964
|
|
|
|
|
|
|
L now passes comments through, |
|
2965
|
|
|
|
|
|
|
rather than eating them as before. |
|
2966
|
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
Also, L now supports a C<--> option to prevent misinterpreting |
|
2968
|
|
|
|
|
|
|
sub-program parameters as for dbmapreduce. |
|
2969
|
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
2971
|
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
L no longer figures out if it needs to add the key |
|
2973
|
|
|
|
|
|
|
to the output. For multi-key-aware reducers, it never does |
|
2974
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
|
2975
|
|
|
|
|
|
|
it defaults to add the key and will now fail if the reducer adds the key |
|
2976
|
|
|
|
|
|
|
(with error "dbcolcreate: attempt to create pre-existing column..."). |
|
2977
|
|
|
|
|
|
|
In such cases, one must disable adding the key with the new |
|
2978
|
|
|
|
|
|
|
option C<--no-prepend-key>. |
|
2979
|
|
|
|
|
|
|
|
|
2980
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
2981
|
|
|
|
|
|
|
|
|
2982
|
|
|
|
|
|
|
L no longer copies the input field separator by default. |
|
2983
|
|
|
|
|
|
|
For multi-key-aware reducers, it never does |
|
2984
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
|
2985
|
|
|
|
|
|
|
it defaults to I copying the field separator, |
|
2986
|
|
|
|
|
|
|
but it will copy it (the old default) with the C<--copy-fs> option |
|
2987
|
|
|
|
|
|
|
|
|
2988
|
|
|
|
|
|
|
=back |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
=head2 2.45, 2013-10-07 |
|
2991
|
|
|
|
|
|
|
cleanup from de-thread-ification |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
=over 4 |
|
2994
|
|
|
|
|
|
|
|
|
2995
|
|
|
|
|
|
|
=item BUG FIX |
|
2996
|
|
|
|
|
|
|
|
|
2997
|
|
|
|
|
|
|
Corrected a fast busy-wait in L. |
|
2998
|
|
|
|
|
|
|
|
|
2999
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3000
|
|
|
|
|
|
|
|
|
3001
|
|
|
|
|
|
|
Endgame mode enabled in L; it (and also large cases of L) |
|
3002
|
|
|
|
|
|
|
should now exploit greater parallelism. |
|
3003
|
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
=item BUG FIX |
|
3005
|
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
Test case with C (gone since 2.44) now removed. |
|
3007
|
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
=back |
|
3009
|
|
|
|
|
|
|
|
|
3010
|
|
|
|
|
|
|
=head2 2.46, 2013-10-08 |
|
3011
|
|
|
|
|
|
|
continuing cleanup of our no-threads version |
|
3012
|
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
=over 4 |
|
3014
|
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
=item BUG FIX |
|
3016
|
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
Fixed some packaging details. |
|
3018
|
|
|
|
|
|
|
(Really, threads are no longer required, |
|
3019
|
|
|
|
|
|
|
missing tests in the MANIFEST.) |
|
3020
|
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
=item IMPROVEMENT |
|
3022
|
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
L now better communicates with the merge process to avoid |
|
3024
|
|
|
|
|
|
|
bursty parallelism. |
|
3025
|
|
|
|
|
|
|
|
|
3026
|
|
|
|
|
|
|
L now can take C<-autoflush => 1> |
|
3027
|
|
|
|
|
|
|
for line-buffered IO. |
|
3028
|
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
=back |
|
3030
|
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
=head2 2.47, 2013-10-12 |
|
3032
|
|
|
|
|
|
|
test suite cleanup for non-threaded perls |
|
3033
|
|
|
|
|
|
|
|
|
3034
|
|
|
|
|
|
|
=over 4 |
|
3035
|
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
=item BUG FIX |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
Removed some stray "use threads" in some test cases. |
|
3039
|
|
|
|
|
|
|
We didn't need them, and these were breaking non-threaded perls. |
|
3040
|
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
=item BUG FIX |
|
3042
|
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
Better handling of Fred cleanup; |
|
3044
|
|
|
|
|
|
|
should fix intermittent L failures on BSD. |
|
3045
|
|
|
|
|
|
|
|
|
3046
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3047
|
|
|
|
|
|
|
|
|
3048
|
|
|
|
|
|
|
Improved test framework to show output when tests fail. |
|
3049
|
|
|
|
|
|
|
(This time, for real.) |
|
3050
|
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=back |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
=head2 2.48, 2014-01-03 |
|
3054
|
|
|
|
|
|
|
small bugfixes and improved release engineering |
|
3055
|
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
=over 4 |
|
3057
|
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3059
|
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
Test suites now skip tests for libraries that are missing. |
|
3061
|
|
|
|
|
|
|
(Patch for missing C contributed by Calvin Ardi.) |
|
3062
|
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
Removed references to Jdb in the package specification. |
|
3066
|
|
|
|
|
|
|
Since the name was changed in 2008, there's no longer a huge |
|
3067
|
|
|
|
|
|
|
need for backwards comparability. |
|
3068
|
|
|
|
|
|
|
(Suggestion form Petr Å abata.) |
|
3069
|
|
|
|
|
|
|
|
|
3070
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3071
|
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
Test suites now invoke the perl using the path from C<$Config{perlpath}>. |
|
3073
|
|
|
|
|
|
|
Hopefully this helps testing in environments where there are multiple installed |
|
3074
|
|
|
|
|
|
|
perls and the default perl is not the same as the perl-under-test |
|
3075
|
|
|
|
|
|
|
(as happens in cpantesters.org). |
|
3076
|
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
=item BUG FIX |
|
3078
|
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
Added specific encoding to this manpage to account for |
|
3080
|
|
|
|
|
|
|
Unicode. Required to build correctly against perl-5.18. |
|
3081
|
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
=back |
|
3083
|
|
|
|
|
|
|
|
|
3084
|
|
|
|
|
|
|
=head2 2.49, 2014-01-04 |
|
3085
|
|
|
|
|
|
|
bugfix to unicode handling in Fsdb IO (plus minor packaging fixes) |
|
3086
|
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
=over 4 |
|
3088
|
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
=item BUG FIX |
|
3090
|
|
|
|
|
|
|
|
|
3091
|
|
|
|
|
|
|
Restored a line in the F<.spec> to chmod g-s. |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
=item BUG FIX |
|
3094
|
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
Unicode decoding is now handled correctly for programs that read |
|
3096
|
|
|
|
|
|
|
from standard input. |
|
3097
|
|
|
|
|
|
|
(Also: New test scripts cover unicode input and output.) |
|
3098
|
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
=item BUG FIX |
|
3100
|
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
Fix to L documentation encoding line. |
|
3102
|
|
|
|
|
|
|
Addresses test failure in perl-5.16 and earlier. |
|
3103
|
|
|
|
|
|
|
(Who knew "encoding" had to be followed by a blank line.) |
|
3104
|
|
|
|
|
|
|
|
|
3105
|
|
|
|
|
|
|
=back |
|
3106
|
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
|
3108
|
|
|
|
|
|
|
|
|
3109
|
|
|
|
|
|
|
=head2 2.50, 2014-05-27 |
|
3110
|
|
|
|
|
|
|
a quick release for spec tweaks |
|
3111
|
|
|
|
|
|
|
|
|
3112
|
|
|
|
|
|
|
=over 4 |
|
3113
|
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3115
|
|
|
|
|
|
|
|
|
3116
|
|
|
|
|
|
|
In L, the C<-N> (no output, even comments) option now |
|
3117
|
|
|
|
|
|
|
implies C<-n>, and it now suppresses the header and trailer. |
|
3118
|
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
=item BUG FIX |
|
3120
|
|
|
|
|
|
|
|
|
3121
|
|
|
|
|
|
|
A few more tweaks to the F from Petr Å abata. |
|
3122
|
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
=item BUG FIX |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
Fixed 3 uses of C |
|
3126
|
|
|
|
|
|
|
failures (due to warnings, not real failures) on some platforms. |
|
3127
|
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
=back |
|
3129
|
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
=head2 2.51, 2014-09-05 |
|
3131
|
|
|
|
|
|
|
Feature enhancements to L, L, L, and new L |
|
3132
|
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
=over 4 |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3136
|
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
L now has a C<--no-recreate-fatal> |
|
3138
|
|
|
|
|
|
|
that causes it to ignore creation of existing columns |
|
3139
|
|
|
|
|
|
|
(instead of failing). |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
L once again is robust to reducers |
|
3144
|
|
|
|
|
|
|
that output the key; |
|
3145
|
|
|
|
|
|
|
C<--no-prepend-key> is no longer mandatory. |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3148
|
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
L can now enumerate the output rows with C<-E>. |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
=item BUG FIX |
|
3152
|
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
L is more mathematically robust. |
|
3154
|
|
|
|
|
|
|
Previously for some inputs and some platforms, |
|
3155
|
|
|
|
|
|
|
floating point rounding could |
|
3156
|
|
|
|
|
|
|
sometimes cause squareroots of negative numbers. |
|
3157
|
|
|
|
|
|
|
|
|
3158
|
|
|
|
|
|
|
=item NEW |
|
3159
|
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
L converts the output of the MySQL or MarinaDB |
|
3161
|
|
|
|
|
|
|
select comment into fsdb format. |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
|
3164
|
|
|
|
|
|
|
|
|
3165
|
|
|
|
|
|
|
L now outputs the I row |
|
3166
|
|
|
|
|
|
|
when doing sloppy numeric comparisons, |
|
3167
|
|
|
|
|
|
|
to better support test suites. |
|
3168
|
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
=back |
|
3170
|
|
|
|
|
|
|
|
|
3171
|
|
|
|
|
|
|
=head2 2.52, 2014-11-03 |
|
3172
|
|
|
|
|
|
|
Fixing the test suite for line number changes. |
|
3173
|
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
=over 4 |
|
3175
|
|
|
|
|
|
|
|
|
3176
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3177
|
|
|
|
|
|
|
|
|
3178
|
|
|
|
|
|
|
Test suites changes to be robust to exact line numbers of failures, |
|
3179
|
|
|
|
|
|
|
since different Perl releases fail on different lines. |
|
3180
|
|
|
|
|
|
|
L |
|
3181
|
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
=back |
|
3183
|
|
|
|
|
|
|
|
|
3184
|
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=head2 2.53, 2014-11-26 |
|
3186
|
|
|
|
|
|
|
bug fixes and stability improvements to dbmapreduce |
|
3187
|
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
=over 4 |
|
3189
|
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3191
|
|
|
|
|
|
|
|
|
3192
|
|
|
|
|
|
|
The L how supports a C<--quiet> option. |
|
3193
|
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3195
|
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
Better documention of L. |
|
3197
|
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
=item BUGFIX |
|
3199
|
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
Added groff-base and perl-podlators to the Fedora package spec. |
|
3201
|
|
|
|
|
|
|
Fixes L. |
|
3202
|
|
|
|
|
|
|
(Also in package 2.52-2.) |
|
3203
|
|
|
|
|
|
|
|
|
3204
|
|
|
|
|
|
|
=item BUGFIX |
|
3205
|
|
|
|
|
|
|
|
|
3206
|
|
|
|
|
|
|
An important stability improvement to L. |
|
3207
|
|
|
|
|
|
|
It, plus L, and L now support |
|
3208
|
|
|
|
|
|
|
controlled parallelism with the C<--pararallelism=N> option. |
|
3209
|
|
|
|
|
|
|
They default to run with the number of available CPUs. |
|
3210
|
|
|
|
|
|
|
L also moderates its level of parallelism. |
|
3211
|
|
|
|
|
|
|
Previously it would create reducers as needed, |
|
3212
|
|
|
|
|
|
|
causing CPU thrashing if reducers ran much slower than data production. |
|
3213
|
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
=item BUGFIX |
|
3215
|
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
The combination of L with L now works |
|
3217
|
|
|
|
|
|
|
as it should. (The obscure bug was an interaction with L |
|
3218
|
|
|
|
|
|
|
with non-multi-key reducers that output their own key. L |
|
3219
|
|
|
|
|
|
|
has too many useful corner cases.) |
|
3220
|
|
|
|
|
|
|
|
|
3221
|
|
|
|
|
|
|
=back |
|
3222
|
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
=head2 2.54, 2014-11-28 |
|
3224
|
|
|
|
|
|
|
fix for the test suite to correct failing tests on not-my-platform |
|
3225
|
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
=over 4 |
|
3227
|
|
|
|
|
|
|
|
|
3228
|
|
|
|
|
|
|
=item BUGFIX |
|
3229
|
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
Sigh, the test suite now has a test suite. |
|
3231
|
|
|
|
|
|
|
Because, yes, I broke it, causing many incorrect failures |
|
3232
|
|
|
|
|
|
|
at cpantesters. |
|
3233
|
|
|
|
|
|
|
Now fixed. |
|
3234
|
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=back |
|
3236
|
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
=head2 2.55, 2015-01-05 |
|
3238
|
|
|
|
|
|
|
many spelling fixes and L tests are more robust to different numeric precision |
|
3239
|
|
|
|
|
|
|
|
|
3240
|
|
|
|
|
|
|
=over 4 |
|
3241
|
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3243
|
|
|
|
|
|
|
|
|
3244
|
|
|
|
|
|
|
L now can be extra quiet, as I continue to try to track down |
|
3245
|
|
|
|
|
|
|
a numeric difference on FreeBSD AMD boxes. |
|
3246
|
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3248
|
|
|
|
|
|
|
|
|
3249
|
|
|
|
|
|
|
L gave different test output |
|
3250
|
|
|
|
|
|
|
(just reflecting rounding error) |
|
3251
|
|
|
|
|
|
|
when stddev approaches zero. We now detect hand handle this case. |
|
3252
|
|
|
|
|
|
|
See |
|
3253
|
|
|
|
|
|
|
and thanks to H. Merijn Brand for the bug report. |
|
3254
|
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
=item BUG FIX |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
Many, many spelling bugs found by |
|
3258
|
|
|
|
|
|
|
H. Merijn Brand; thanks for the bug report. |
|
3259
|
|
|
|
|
|
|
|
|
3260
|
|
|
|
|
|
|
=item INCOMPATBLE CHANGE |
|
3261
|
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
A number of programs had misspelled "separator" |
|
3263
|
|
|
|
|
|
|
in C<--fieldseparator> and C<--columnseparator> options as "seperator". |
|
3264
|
|
|
|
|
|
|
These are now correctly spelled. |
|
3265
|
|
|
|
|
|
|
|
|
3266
|
|
|
|
|
|
|
=back |
|
3267
|
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
=head2 2.56, 2015-02-03 |
|
3269
|
|
|
|
|
|
|
fix against Getopt::Long-2.43's stricter error checkign |
|
3270
|
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
=over 4 |
|
3272
|
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
=item BUG FIX |
|
3274
|
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
Internal argument parsing uses Getopt::Long, but mixed pass-through and EE. |
|
3276
|
|
|
|
|
|
|
Bug reported by Petr Pisar at L.a |
|
3277
|
|
|
|
|
|
|
|
|
3278
|
|
|
|
|
|
|
=item BUG FIX |
|
3279
|
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
Added missing BuildRequires for C. |
|
3281
|
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
=back |
|
3283
|
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
=head2 2.57, 2015-04-29 |
|
3285
|
|
|
|
|
|
|
Minor changes, with better performance from L. |
|
3286
|
|
|
|
|
|
|
|
|
3287
|
|
|
|
|
|
|
=over 4 |
|
3288
|
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
=item BUG FIX |
|
3290
|
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
|
3292
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
|
3293
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
|
3294
|
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
=item ENHANCMENT |
|
3296
|
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
L should be faster with lots of small keys. |
|
3298
|
|
|
|
|
|
|
L now supports C<-k> to get some of the functionality of |
|
3299
|
|
|
|
|
|
|
L (if data is pre-sorted and median/quartiles are not required). |
|
3300
|
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
|
3302
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
|
3303
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
=back |
|
3306
|
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
|
|
3308
|
|
|
|
|
|
|
=head2 2.58, 2015-04-30 |
|
3309
|
|
|
|
|
|
|
Bugfix in L |
|
3310
|
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
=over 4 |
|
3312
|
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
=item BUG FIX |
|
3314
|
|
|
|
|
|
|
|
|
3315
|
|
|
|
|
|
|
Fixed a case where L suffered mojobake in endgame mode. |
|
3316
|
|
|
|
|
|
|
This bug surfaced when L was applied to large files |
|
3317
|
|
|
|
|
|
|
(big enough to require merging) with unicode in them; |
|
3318
|
|
|
|
|
|
|
the symptom was soemthing like: |
|
3319
|
|
|
|
|
|
|
Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111. |
|
3320
|
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
=back |
|
3322
|
|
|
|
|
|
|
|
|
3323
|
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
=head2 2.59, 2016-09-01 |
|
3325
|
|
|
|
|
|
|
Collect a few small bug fixes and documentation improvements. |
|
3326
|
|
|
|
|
|
|
|
|
3327
|
|
|
|
|
|
|
=over 4 |
|
3328
|
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
=item BUG FIX |
|
3330
|
|
|
|
|
|
|
|
|
3331
|
|
|
|
|
|
|
More IO is explicitly marked UTF-8 to avoid Perl's tendency to |
|
3332
|
|
|
|
|
|
|
mojibake on otherwise valid unicode input. |
|
3333
|
|
|
|
|
|
|
This change helps L. |
|
3334
|
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3336
|
|
|
|
|
|
|
|
|
3337
|
|
|
|
|
|
|
L now crossreferences L. |
|
3338
|
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3340
|
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
Documentation for L now clarifies that the default is baseline mode. |
|
3342
|
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
=item BUG FIX |
|
3344
|
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
L now propagates C<-T> into the sorting process (if it is required). |
|
3346
|
|
|
|
|
|
|
Thanks to Lan Wei for reporting this bug. |
|
3347
|
|
|
|
|
|
|
|
|
3348
|
|
|
|
|
|
|
=back |
|
3349
|
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
=head2 2.60, 2016-09-04 |
|
3352
|
|
|
|
|
|
|
Adds support for hash joins. |
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
=over 4 |
|
3355
|
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3357
|
|
|
|
|
|
|
|
|
3358
|
|
|
|
|
|
|
L now supports hash joins |
|
3359
|
|
|
|
|
|
|
with C<-t lefthash> and C<-t righthash>. |
|
3360
|
|
|
|
|
|
|
Hash joins cache a table in memory, but do not require |
|
3361
|
|
|
|
|
|
|
that the other table be sorted. |
|
3362
|
|
|
|
|
|
|
They are ideal when joining a large table against a small one. |
|
3363
|
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
=back |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
=head2 2.61, 2016-09-05 |
|
3367
|
|
|
|
|
|
|
Support left and right outer joins. |
|
3368
|
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
=over 4 |
|
3370
|
|
|
|
|
|
|
|
|
3371
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3372
|
|
|
|
|
|
|
|
|
3373
|
|
|
|
|
|
|
L now handles left and right outer joins |
|
3374
|
|
|
|
|
|
|
with C<-t left> and C<-t right>. |
|
3375
|
|
|
|
|
|
|
|
|
3376
|
|
|
|
|
|
|
=item ENHANCEMENT |
|
3377
|
|
|
|
|
|
|
|
|
3378
|
|
|
|
|
|
|
L hash joins are now selected |
|
3379
|
|
|
|
|
|
|
with C<-m lefthash> and C<-m righthash> |
|
3380
|
|
|
|
|
|
|
(not the shortlived C<-t righthash> option). |
|
3381
|
|
|
|
|
|
|
(Technically this change is incompatible with Fsdd-2.60, but |
|
3382
|
|
|
|
|
|
|
no one but me ever used that version.) |
|
3383
|
|
|
|
|
|
|
|
|
3384
|
|
|
|
|
|
|
=back |
|
3385
|
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
=head1 AUTHOR |
|
3387
|
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
John Heidemann, C |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
See L for the many people who have contributed |
|
3391
|
|
|
|
|
|
|
bug reports and fixes. |
|
3392
|
|
|
|
|
|
|
|
|
3393
|
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
3395
|
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
Fsdb is Copyright (C) 1991-2016 by John Heidemann . |
|
3397
|
|
|
|
|
|
|
|
|
3398
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
|
3399
|
|
|
|
|
|
|
it under the terms of version 2 of the GNU General Public License as |
|
3400
|
|
|
|
|
|
|
published by the Free Software Foundation. |
|
3401
|
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but |
|
3403
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
|
3404
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
3405
|
|
|
|
|
|
|
General Public License for more details. |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
3408
|
|
|
|
|
|
|
along with this program; if not, write to the Free Software |
|
3409
|
|
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|
3410
|
|
|
|
|
|
|
|
|
3411
|
|
|
|
|
|
|
A copy of the GNU General Public License can be found in the file |
|
3412
|
|
|
|
|
|
|
``COPYING''. |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
|
|
3415
|
|
|
|
|
|
|
|
|
3416
|
|
|
|
|
|
|
=head1 COMMENTS and BUG REPORTS |
|
3417
|
|
|
|
|
|
|
|
|
3418
|
|
|
|
|
|
|
Any comments about these programs should be sent to John Heidemann |
|
3419
|
|
|
|
|
|
|
C. |
|
3420
|
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
=cut |
|
3423
|
|
|
|
|
|
|
|
|
3424
|
|
|
|
|
|
|
1; # End of Fsdb |
|
3425
|
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
# LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg |
|
3427
|
|
|
|
|
|
|
# LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un |
|
3428
|
|
|
|
|
|
|
# LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw |
|
3429
|
|
|
|
|
|
|
# LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize |
|
3430
|
|
|
|
|
|
|
# LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct |
|
3431
|
|
|
|
|
|
|
# LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR |
|
3432
|
|
|
|
|
|
|
# LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss |
|
3433
|
|
|
|
|
|
|
# LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff |
|
3434
|
|
|
|
|
|
|
# LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb |
|
3435
|
|
|
|
|
|
|
# LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley |
|
3436
|
|
|
|
|
|
|
# LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl |
|
3437
|
|
|
|
|
|
|
# LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar |
|
3438
|
|
|
|
|
|
|
# LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed |
|
3439
|
|
|
|
|
|
|
# LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah |
|
3440
|
|
|
|
|
|
|
# LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt |
|
3441
|
|
|
|
|
|
|
# LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit |
|
3442
|
|
|
|
|
|
|
# LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh |
|
3443
|
|
|
|
|
|
|
# LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre |
|
3444
|
|
|
|
|
|
|
# LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr |
|
3445
|
|
|
|
|
|
|
# LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz |
|
3446
|
|
|
|
|
|
|
# LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO |
|
3447
|
|
|
|
|
|
|
# LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen |
|
3448
|
|
|
|
|
|
|
# LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw |
|
3449
|
|
|
|
|
|
|
# LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount |
|
3450
|
|
|
|
|
|
|
# LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho |
|
3451
|
|
|
|
|
|
|
# LocalWords: alfred david clark constantine debrorah Fsdb's colized listized |
|
3452
|
|
|
|
|
|
|
# LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa |
|
3453
|
|
|
|
|
|
|
# LocalWords: refcount lossage DaGronk dbcolscorellate ipchain |