| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
2
|
|
|
|
|
|
|
# Copyright (c) 2002-2003 Vivendi Universal Net USA |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# May be copied under the same terms as perl itself. |
|
5
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Database-like operations on tab-delimited files. |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# Given two files: |
|
11
|
|
|
|
|
|
|
# band_data.tab with fields band_id, band_name, and band_status |
|
12
|
|
|
|
|
|
|
# song_data.tab with fields song_id, band_id, song_title |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
# The following sequence is more or less equivalent to |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# SELECT song_id, band_data.band_id AS band_id, |
|
17
|
|
|
|
|
|
|
# song_title, band_name, |
|
18
|
|
|
|
|
|
|
# int(band_id/1000) AS band_dir |
|
19
|
|
|
|
|
|
|
# FROM song_data INNER JOIN band_data ON song_data.band_id=band_data.band_id |
|
20
|
|
|
|
|
|
|
# WHERE band_status = 'APPROVED' |
|
21
|
|
|
|
|
|
|
# ORDER BY band_name |
|
22
|
|
|
|
|
|
|
# INTO TABLE songband |
|
23
|
|
|
|
|
|
|
# |
|
24
|
|
|
|
|
|
|
# |
|
25
|
|
|
|
|
|
|
# $band_data = Text::TabTable->import_headered("band_data.tab") ; |
|
26
|
|
|
|
|
|
|
# $song_data = Text::TabTable->import_headered("song_data.tab") ; |
|
27
|
|
|
|
|
|
|
# $joined = $song_data->join($band_data, "band_id", "band_id", "INNER") ; |
|
28
|
|
|
|
|
|
|
# $selected = $joined->select( |
|
29
|
|
|
|
|
|
|
# [ |
|
30
|
|
|
|
|
|
|
# 'song_id', |
|
31
|
|
|
|
|
|
|
# ['band_data.band_id', 'band_id'], |
|
32
|
|
|
|
|
|
|
# 'song_title', |
|
33
|
|
|
|
|
|
|
# 'band_name', |
|
34
|
|
|
|
|
|
|
# [ sub { int($_[0] / 1000) }, "band_dir", ["band_id"]], |
|
35
|
|
|
|
|
|
|
# ], |
|
36
|
|
|
|
|
|
|
# |
|
37
|
|
|
|
|
|
|
# sub { $_[0]->band_status eq 'APPROVED' }, |
|
38
|
|
|
|
|
|
|
# ) ; |
|
39
|
|
|
|
|
|
|
# $out = $selected->order("band_name") ; |
|
40
|
|
|
|
|
|
|
# $out->export_headered("songband.tab") ; |
|
41
|
|
|
|
|
|
|
# |
|
42
|
|
|
|
|
|
|
############################################################################## |
|
43
|
|
|
|
|
|
|
# |
|
44
|
|
|
|
|
|
|
# You can speed up LEFT and INNER joins on primary keys if you create an index |
|
45
|
|
|
|
|
|
|
# for the primary key column on the *right-side* table using |
|
46
|
|
|
|
|
|
|
# |
|
47
|
|
|
|
|
|
|
# $righttable->build_primary_index("band_id") ; |
|
48
|
|
|
|
|
|
|
# $newtable = $lefttable->join($righttable, "band_id", "band_id", "LEFT") ; |
|
49
|
|
|
|
|
|
|
# |
|
50
|
|
|
|
|
|
|
# If both tables are already sorted by the primary key because order() was |
|
51
|
|
|
|
|
|
|
# previously used, this will be slower. |
|
52
|
|
|
|
|
|
|
# |
|
53
|
|
|
|
|
|
|
# The index will not be used for RIGHT joins. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
package Text::TabTable ; |
|
56
|
|
|
|
|
|
|
|
|
57
|
1
|
|
|
1
|
|
749
|
use strict ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
29
|
|
|
58
|
1
|
|
|
1
|
|
4
|
use Carp ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
162
|
|
|
59
|
1
|
|
|
1
|
|
1122
|
use Data::Dumper ; |
|
|
1
|
|
|
|
|
11085
|
|
|
|
1
|
|
|
|
|
68
|
|
|
60
|
1
|
|
|
1
|
|
10
|
use Fcntl qw(O_WRONLY O_EXCL O_CREAT) ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
62
|
|
|
61
|
|
|
|
|
|
|
|
|
62
|
1
|
|
|
1
|
|
5
|
use vars qw($SORT $JOIN $VERBOSE $TMPDIR $VERSION) ; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
13886
|
|
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
$VERSION = "1.02" ; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
$TMPDIR = "." ; |
|
67
|
|
|
|
|
|
|
$SORT = "/bin/sort" ; |
|
68
|
|
|
|
|
|
|
$JOIN = "/usr/bin/join" ; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$VERBOSE=$ENV{TABTABLE_VERBOSE} ; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
#### |
|
73
|
|
|
|
|
|
|
# Constructor. Takes a tab delimited file with a field name |
|
74
|
|
|
|
|
|
|
# header line and returns a TabTable object. Parses the header line |
|
75
|
|
|
|
|
|
|
# and creates a temporary file without the header line. |
|
76
|
|
|
|
|
|
|
#### |
|
77
|
|
|
|
|
|
|
sub import_headered |
|
78
|
|
|
|
|
|
|
{ |
|
79
|
0
|
|
|
0
|
0
|
|
my ($package,$fname) = @_ ; |
|
80
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
81
|
|
|
|
|
|
|
|
|
82
|
0
|
0
|
|
|
|
|
carp "importing $fname" if $VERBOSE ; |
|
83
|
0
|
0
|
|
|
|
|
open(F, $fname) || return undef ; |
|
84
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || return undef ; |
|
85
|
0
|
|
|
|
|
|
my $header = ; |
|
86
|
0
|
|
|
|
|
|
my $buf ; |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# copy the unheadered version of the file to a new file. |
|
89
|
0
|
|
|
|
|
|
while( read(F, $buf, 2048) ) { |
|
90
|
0
|
|
|
|
|
|
print NEWF $buf ; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
0
|
|
|
|
|
|
close F ; |
|
93
|
0
|
|
|
|
|
|
close NEWF ; |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
chomp $header ; |
|
96
|
0
|
|
|
|
|
|
my @fieldnames = split(/\t/, $header) ; |
|
97
|
0
|
|
|
|
|
|
my @fields = map { Text::TabTable::Field->new($_) } @fieldnames ; |
|
|
0
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my $name = $fname ; |
|
100
|
0
|
|
|
|
|
|
$name =~ s/\..*$// ; # remove extensions |
|
101
|
0
|
|
|
|
|
|
$name =~ s@.*\/@@ ; # remove path |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
my $self = { |
|
104
|
|
|
|
|
|
|
filename => $newf, |
|
105
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
|
106
|
|
|
|
|
|
|
name => $name, |
|
107
|
|
|
|
|
|
|
} ; |
|
108
|
0
|
|
|
|
|
|
bless $self, $package ; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#### |
|
112
|
|
|
|
|
|
|
# Alternate constructor. Takes a tab delimited file *without* a field name |
|
113
|
|
|
|
|
|
|
# header line, plus the field names, and returns a TabTable object. This |
|
114
|
|
|
|
|
|
|
# saves time because it doesn't require making a tempfile without the header. |
|
115
|
|
|
|
|
|
|
#### |
|
116
|
|
|
|
|
|
|
sub import_unheadered |
|
117
|
|
|
|
|
|
|
{ |
|
118
|
0
|
|
|
0
|
0
|
|
my ($package,$fname, @fieldnames) = @_ ; |
|
119
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
120
|
|
|
|
|
|
|
|
|
121
|
0
|
0
|
|
|
|
|
carp "importing $fname (unheadered)" if $VERBOSE ; |
|
122
|
0
|
0
|
0
|
|
|
|
return undef if !-f $fname || !-r $fname ; |
|
123
|
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
|
my @fields = map { Text::TabTable::Field->new($_) } @fieldnames ; |
|
|
0
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
|
my $name = $fname ; |
|
127
|
0
|
|
|
|
|
|
$name =~ s/\..*$// ; # remove extensions |
|
128
|
0
|
|
|
|
|
|
$name =~ s@.*\/@@ ; # remove path |
|
129
|
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $self = { |
|
131
|
|
|
|
|
|
|
filename => $fname, |
|
132
|
|
|
|
|
|
|
dontdelete => 1, # so we know it's not a tempfile. |
|
133
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
|
134
|
|
|
|
|
|
|
name => $name, |
|
135
|
|
|
|
|
|
|
} ; |
|
136
|
0
|
|
|
|
|
|
bless $self, $package ; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
#### |
|
141
|
|
|
|
|
|
|
# Undoes the escaping done by MediaExtractor. |
|
142
|
|
|
|
|
|
|
#### |
|
143
|
|
|
|
|
|
|
sub unescape |
|
144
|
|
|
|
|
|
|
{ |
|
145
|
0
|
|
|
0
|
0
|
|
my ($str) = @_ ; |
|
146
|
|
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
|
my $x = $str ; |
|
148
|
0
|
|
|
|
|
|
$str =~ s/\\\\/\xff/g ; |
|
149
|
0
|
|
|
|
|
|
$str =~ s/\\n/\n/g ; |
|
150
|
0
|
|
|
|
|
|
$str =~ s/\\t/\t/g ; |
|
151
|
0
|
|
|
|
|
|
$str =~ s/\xff/\\/g ; |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
return $str ; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
#### |
|
157
|
|
|
|
|
|
|
# This is the same escaping done by MediaExtractor. |
|
158
|
|
|
|
|
|
|
#### |
|
159
|
|
|
|
|
|
|
sub escape |
|
160
|
|
|
|
|
|
|
{ |
|
161
|
0
|
|
|
0
|
0
|
|
my ($str) = @_ ; |
|
162
|
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
$str =~ s/\\/\\\\/g; |
|
164
|
0
|
|
|
|
|
|
$str =~ s/\t/\\t/g; |
|
165
|
0
|
|
|
|
|
|
$str =~ s/\n/\\n/g; |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
return $str ; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
#### |
|
172
|
|
|
|
|
|
|
# Writes out a table as a file with a header. |
|
173
|
|
|
|
|
|
|
#### |
|
174
|
|
|
|
|
|
|
sub export_headered |
|
175
|
|
|
|
|
|
|
{ |
|
176
|
0
|
|
|
0
|
0
|
|
my ($self, $filename) = @_ ; |
|
177
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to $filename" if $VERBOSE ; |
|
178
|
|
|
|
|
|
|
|
|
179
|
0
|
0
|
|
|
|
|
open(F, ">$filename") || croak "$filename: $!\n" ; |
|
180
|
0
|
|
|
|
|
|
print F $self->{fieldlist}->as_string(), "\n" ; |
|
181
|
0
|
|
|
|
|
|
close F ; |
|
182
|
0
|
|
|
|
|
|
system "cat $self->{filename} >> $filename" ; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
#### |
|
186
|
|
|
|
|
|
|
# Writes out a table as a file without a header. |
|
187
|
|
|
|
|
|
|
#### |
|
188
|
|
|
|
|
|
|
sub export_unheadered |
|
189
|
|
|
|
|
|
|
{ |
|
190
|
0
|
|
|
0
|
0
|
|
my ($self, $filename) = @_ ; |
|
191
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to $filename" if $VERBOSE ; |
|
192
|
0
|
|
|
|
|
|
system "cp $self->{filename} $filename" ; |
|
193
|
0
|
0
|
|
|
|
|
if ($?) { |
|
194
|
0
|
|
|
|
|
|
unlink $filename ; |
|
195
|
0
|
|
|
|
|
|
croak "can't export to $filename: $!" ; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
#### |
|
200
|
|
|
|
|
|
|
# Returns a new table that has only one of each value in the specified |
|
201
|
|
|
|
|
|
|
# column. |
|
202
|
|
|
|
|
|
|
#### |
|
203
|
|
|
|
|
|
|
sub uniq |
|
204
|
|
|
|
|
|
|
{ |
|
205
|
0
|
|
|
0
|
0
|
|
my ($table, $colname) = @_ ; |
|
206
|
|
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($colname) ; |
|
208
|
0
|
0
|
|
|
|
|
croak "no field $colname in table" if !$colname ; |
|
209
|
|
|
|
|
|
|
|
|
210
|
0
|
0
|
0
|
|
|
|
if (!$table->{sorted_column} || $table->{sorted_column} != $colnum) { |
|
211
|
0
|
|
|
|
|
|
my $name = $table->name() ; |
|
212
|
0
|
|
|
|
|
|
$table = $table->order($colname) ; |
|
213
|
0
|
|
|
|
|
|
$table->name($name) ; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
|
carp "uniquing $table->{name} by $colname" if $VERBOSE ; |
|
217
|
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
219
|
|
|
|
|
|
|
|
|
220
|
0
|
0
|
|
|
|
|
open(OLDF, "<$table->{filename}") || die ; |
|
221
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!\n" ; |
|
222
|
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
my $oldval = undef ; |
|
224
|
0
|
|
|
|
|
|
while () { |
|
225
|
0
|
|
|
|
|
|
chomp ; |
|
226
|
0
|
|
|
|
|
|
my @f = split(/\t/, $_, -1) ; |
|
227
|
0
|
0
|
0
|
|
|
|
if ($oldval ne $f[$colnum-1] || !defined $oldval) { |
|
228
|
0
|
|
|
|
|
|
print NEWF $_, "\n" ; |
|
229
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
close(OLDF) ; |
|
234
|
0
|
|
|
|
|
|
close(NEWF) ; |
|
235
|
|
|
|
|
|
|
|
|
236
|
0
|
|
|
|
|
|
my $newtable = { |
|
237
|
|
|
|
|
|
|
filename => $newf, |
|
238
|
|
|
|
|
|
|
fieldlist => $table->{fieldlist}->deepcopy(), |
|
239
|
|
|
|
|
|
|
sorted_colnum => $colnum, |
|
240
|
|
|
|
|
|
|
name => $table->name(), |
|
241
|
|
|
|
|
|
|
} ; |
|
242
|
|
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
|
244
|
0
|
|
|
|
|
|
carp "Warning: Useless uniq in void context." ; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
bless $newtable, ref $table ; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
#### |
|
251
|
|
|
|
|
|
|
# Export to a cdb file. |
|
252
|
|
|
|
|
|
|
# There will be a special key "*FIELDNAMES*" whose value is a tab |
|
253
|
|
|
|
|
|
|
# separated list of the names of the fields. |
|
254
|
|
|
|
|
|
|
# The rest of the cdb file will be of the form key => tab-delimited-values. |
|
255
|
|
|
|
|
|
|
# |
|
256
|
|
|
|
|
|
|
# The key must be unique; however as a special case multiple blank keys |
|
257
|
|
|
|
|
|
|
# are allowed to be present; only the first one is used. This is a hack, |
|
258
|
|
|
|
|
|
|
# but is too good an optimization to pass up. |
|
259
|
|
|
|
|
|
|
#### |
|
260
|
|
|
|
|
|
|
sub export_cdb |
|
261
|
|
|
|
|
|
|
{ |
|
262
|
0
|
|
|
0
|
0
|
|
my ($self, $filename, $colname) = @_ ; |
|
263
|
0
|
|
|
|
|
|
require CDB_File ; |
|
264
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to cdb $filename" if $VERBOSE ; |
|
265
|
|
|
|
|
|
|
|
|
266
|
0
|
0
|
|
|
|
|
my $t = CDB_File->new($filename, "$filename.new$$") or croak "$filename: $!" ; |
|
267
|
|
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
|
$t->insert("*FIELDNAMES*", $self->{fieldlist}->as_string()) ; |
|
269
|
|
|
|
|
|
|
|
|
270
|
0
|
0
|
|
|
|
|
open(F, "< $self->{filename}") || die "$self->{filename}: $!" ; |
|
271
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($colname) ; |
|
272
|
0
|
|
|
|
|
|
$colnum-- ; |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Create a regex to skip over the columns before the key column and |
|
275
|
|
|
|
|
|
|
# collect the key column in $1. |
|
276
|
0
|
|
|
|
|
|
my $regex = '^' . ('[^\t]*\t' x $colnum) . '([^\t]*)' ; |
|
277
|
0
|
|
|
|
|
|
$regex = qr($regex) ; |
|
278
|
0
|
|
|
|
|
|
my $didblankkey = 0 ; |
|
279
|
0
|
|
|
|
|
|
while () { |
|
280
|
0
|
|
|
|
|
|
chomp ; |
|
281
|
0
|
0
|
|
|
|
|
/$regex/ || die ; |
|
282
|
0
|
|
|
|
|
|
my $key = $1 ; |
|
283
|
|
|
|
|
|
|
|
|
284
|
0
|
0
|
0
|
|
|
|
next if $key eq '' && $didblankkey++ ; |
|
285
|
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
$t->insert($key, $_) ; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
|
$t->finish() ; |
|
290
|
0
|
|
|
|
|
|
close(F) ; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
#### |
|
294
|
|
|
|
|
|
|
# Returns the named column of the table as an array. |
|
295
|
|
|
|
|
|
|
#### |
|
296
|
|
|
|
|
|
|
sub export_column |
|
297
|
|
|
|
|
|
|
{ |
|
298
|
0
|
|
|
0
|
0
|
|
my ($table, $colname) = @_ ; |
|
299
|
0
|
0
|
|
|
|
|
carp "exporting $table->{name} column $colname" if $VERBOSE ; |
|
300
|
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($colname) ; |
|
302
|
|
|
|
|
|
|
|
|
303
|
0
|
0
|
|
|
|
|
if (!defined $colnum) { |
|
304
|
0
|
|
|
|
|
|
croak "no column $colname" ; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
my @arr ; |
|
308
|
0
|
0
|
|
|
|
|
open(CUT, "cut -f$colnum $table->{filename}|") || die ; |
|
309
|
0
|
|
|
|
|
|
while(defined ($_=)) { |
|
310
|
0
|
|
|
|
|
|
chomp ; |
|
311
|
0
|
|
|
|
|
|
push @arr, $_ ; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
0
|
|
|
|
|
|
close CUT ; |
|
314
|
|
|
|
|
|
|
|
|
315
|
0
|
|
|
|
|
|
return @arr ; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
#### |
|
319
|
|
|
|
|
|
|
# Returns a new TabTable that is sorted by the requested column. Dies |
|
320
|
|
|
|
|
|
|
# if no such column. You can specify -descending=>1 or -numeric =>1 |
|
321
|
|
|
|
|
|
|
# after the fieldname. |
|
322
|
|
|
|
|
|
|
# |
|
323
|
|
|
|
|
|
|
# The sort is stable, so you can sort on multiple fields by doing |
|
324
|
|
|
|
|
|
|
# multiple sorts, with the most important one last. |
|
325
|
|
|
|
|
|
|
#### |
|
326
|
|
|
|
|
|
|
sub order |
|
327
|
|
|
|
|
|
|
{ |
|
328
|
0
|
|
|
0
|
0
|
|
my ($self, $fieldname, %args) = @_ ; |
|
329
|
0
|
0
|
|
|
|
|
carp "sorting $self->{name} by $fieldname" if $VERBOSE ; |
|
330
|
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# This is a flag that gets turned off if the sort is not alphabetic |
|
334
|
|
|
|
|
|
|
# and ascending. In that case, the sort order is not correct for the |
|
335
|
|
|
|
|
|
|
# join() method, and so join() would have to re-sort. |
|
336
|
0
|
|
|
|
|
|
my $joinable_sort = 1 ; |
|
337
|
|
|
|
|
|
|
|
|
338
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($fieldname) ; |
|
339
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
|
340
|
0
|
|
|
|
|
|
unlink $newf ; |
|
341
|
0
|
|
|
|
|
|
croak "No such field $fieldname" ; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
|
my @sortargs = ("-s", |
|
345
|
|
|
|
|
|
|
"-T$TMPDIR", |
|
346
|
|
|
|
|
|
|
"-t\t", |
|
347
|
|
|
|
|
|
|
"-k$colnum,$colnum", |
|
348
|
|
|
|
|
|
|
"-o$newf", |
|
349
|
|
|
|
|
|
|
$self->{filename} |
|
350
|
|
|
|
|
|
|
) ; |
|
351
|
0
|
0
|
|
|
|
|
if ($args{-descending}) { |
|
352
|
0
|
|
|
|
|
|
unshift @sortargs, "-r" ; |
|
353
|
0
|
|
|
|
|
|
$joinable_sort = 0 ; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
0
|
0
|
|
|
|
|
if ($args{-numeric}) { |
|
356
|
0
|
|
|
|
|
|
unshift @sortargs, "-n" ; |
|
357
|
0
|
|
|
|
|
|
$joinable_sort = 0 ; |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
|
system $SORT, @sortargs ; |
|
361
|
|
|
|
|
|
|
|
|
362
|
0
|
0
|
|
|
|
|
if ($?) { |
|
363
|
0
|
|
|
|
|
|
unlink $newf ; |
|
364
|
0
|
|
|
|
|
|
croak "sort error" ; |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
0
|
0
|
|
|
|
|
my $newtable = { |
|
368
|
|
|
|
|
|
|
filename => $newf, |
|
369
|
|
|
|
|
|
|
fieldlist => $self->{fieldlist}->deepcopy(), |
|
370
|
|
|
|
|
|
|
sorted_colnum => $joinable_sort ? $colnum : undef, |
|
371
|
|
|
|
|
|
|
name => $self->name(), |
|
372
|
|
|
|
|
|
|
} ; |
|
373
|
|
|
|
|
|
|
|
|
374
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
|
375
|
0
|
|
|
|
|
|
carp "Warning: Useless order in void context." ; |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
|
|
378
|
0
|
|
|
|
|
|
bless $newtable, ref $self ; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
#### |
|
382
|
|
|
|
|
|
|
# Returns a new table with two columns. The first column will contain the |
|
383
|
|
|
|
|
|
|
# unique values of the specified field, the second column will contain the |
|
384
|
|
|
|
|
|
|
# number of occurrences of that value. |
|
385
|
|
|
|
|
|
|
#### |
|
386
|
|
|
|
|
|
|
sub groupby_and_count |
|
387
|
|
|
|
|
|
|
{ |
|
388
|
0
|
|
|
0
|
0
|
|
my ($table, $fieldname, $newfieldname, %args) = @_ ; |
|
389
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldname) ; |
|
390
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
|
391
|
0
|
|
|
|
|
|
croak "No such field $fieldname" ; |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
# Create a temporary table that is sorted by the specified column. |
|
394
|
0
|
|
|
|
|
|
my $sortedtable = $table->order($fieldname, %args); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# Taken from uniq(), One pass through the file counting the number |
|
397
|
|
|
|
|
|
|
# of times the specified column appears and creating a new file |
|
398
|
|
|
|
|
|
|
# with the specified column and count. |
|
399
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
400
|
0
|
0
|
|
|
|
|
open(OLDF, "<$sortedtable->{filename}") || die ; |
|
401
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!\n" ; |
|
402
|
|
|
|
|
|
|
|
|
403
|
0
|
|
|
|
|
|
my $count = 0 ; |
|
404
|
0
|
|
|
|
|
|
my $oldval = undef ; |
|
405
|
0
|
|
|
|
|
|
while () { |
|
406
|
0
|
|
|
|
|
|
chomp ; |
|
407
|
0
|
|
|
|
|
|
my @f = split(/\t/, $_, -1) ; |
|
408
|
0
|
0
|
|
|
|
|
if (!defined $oldval) { |
|
|
|
0
|
|
|
|
|
|
|
409
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
|
410
|
0
|
|
|
|
|
|
$count = 1 ; |
|
411
|
|
|
|
|
|
|
} elsif ($oldval ne $f[$colnum-1]) { |
|
412
|
0
|
|
|
|
|
|
print NEWF $oldval, "\t", $count, "\n" ; |
|
413
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
|
414
|
0
|
|
|
|
|
|
$count = 1 ; |
|
415
|
|
|
|
|
|
|
} else { |
|
416
|
0
|
|
|
|
|
|
$count++ ; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
} |
|
419
|
0
|
0
|
|
|
|
|
if (defined $oldval) { |
|
420
|
0
|
|
|
|
|
|
print NEWF $oldval, "\t", $count, "\n" ; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
0
|
|
|
|
|
|
close(OLDF) ; |
|
424
|
0
|
|
|
|
|
|
close(NEWF) ; |
|
425
|
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
my @newfieldnames = ($fieldname, $newfieldname) ; |
|
427
|
0
|
|
|
|
|
|
my @newfields = map { Text::TabTable::Field->new($_) } @newfieldnames ; |
|
|
0
|
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
|
my $newtable = { |
|
429
|
|
|
|
|
|
|
filename => $newf, |
|
430
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@newfields), |
|
431
|
|
|
|
|
|
|
sorted_colnum => 1, |
|
432
|
|
|
|
|
|
|
name => $table->name(), |
|
433
|
|
|
|
|
|
|
} ; |
|
434
|
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
|
bless $newtable, ref $table ; |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
#### |
|
439
|
|
|
|
|
|
|
# Takes a list of pairs of oldname=>newname and changes the names of fields |
|
440
|
|
|
|
|
|
|
# of the table. This wipes out the old field names entirely. |
|
441
|
|
|
|
|
|
|
sub rename_fields |
|
442
|
|
|
|
|
|
|
{ |
|
443
|
0
|
|
|
0
|
0
|
|
my ($table, @renames) = @_ ; |
|
444
|
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
|
my $oldname ; |
|
446
|
|
|
|
|
|
|
my $newname ; |
|
447
|
0
|
|
|
|
|
|
my @fields = $table->{fieldlist}->fields() ; |
|
448
|
0
|
|
0
|
|
|
|
while ( ($oldname = shift(@renames)) && ($newname = shift(@renames)) ) { |
|
449
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($oldname) ; |
|
450
|
0
|
|
|
|
|
|
$fields[$colnum-1]->set_name($newname) ; |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
#### |
|
455
|
|
|
|
|
|
|
# Gets or sets the name of the table. |
|
456
|
|
|
|
|
|
|
#### |
|
457
|
|
|
|
|
|
|
sub name |
|
458
|
|
|
|
|
|
|
{ |
|
459
|
0
|
|
|
0
|
0
|
|
my ($self, $name) = @_ ; |
|
460
|
0
|
0
|
|
|
|
|
if (defined $name) { |
|
461
|
0
|
|
|
|
|
|
$self->{name} = $name ; |
|
462
|
|
|
|
|
|
|
} |
|
463
|
0
|
|
|
|
|
|
return $self->{name} ; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#### |
|
467
|
|
|
|
|
|
|
# takes a table and exports it as a cdb file, creating a primary key |
|
468
|
|
|
|
|
|
|
# index. This can make joins go faster if this table is on the right side |
|
469
|
|
|
|
|
|
|
# of the join, since neither table has to be sorted, and building a cdb |
|
470
|
|
|
|
|
|
|
# is generally faster than sorting (~O(n) instead of O(nLogn). |
|
471
|
|
|
|
|
|
|
#### |
|
472
|
|
|
|
|
|
|
sub build_primary_index |
|
473
|
|
|
|
|
|
|
{ |
|
474
|
0
|
|
|
0
|
0
|
|
my ($self, $colname) = @_ ; |
|
475
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
476
|
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
$self->export_cdb($newf, $colname) ; |
|
478
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($colname) ; |
|
479
|
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
$self->{cdb}{$colnum} = $newf ; |
|
481
|
|
|
|
|
|
|
} |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
#### |
|
484
|
|
|
|
|
|
|
# Returns a new table created by joining the two tables on a specified |
|
485
|
|
|
|
|
|
|
# column. the $side parameter can be specified as LEFT or RIGHT to |
|
486
|
|
|
|
|
|
|
# create LEFT/RIGHT joins, or can be INNER, OUTER, or undef. |
|
487
|
|
|
|
|
|
|
# $leftfield/$rightfield are the field names to be used in the two tables |
|
488
|
|
|
|
|
|
|
# for joining. |
|
489
|
|
|
|
|
|
|
# |
|
490
|
|
|
|
|
|
|
# If the right table has a primary index on the join column (created |
|
491
|
|
|
|
|
|
|
# by build_primary_index()), and it's either a left or inner join, |
|
492
|
|
|
|
|
|
|
# a simpler join algorithm will be used that does not require sorting. |
|
493
|
|
|
|
|
|
|
# |
|
494
|
|
|
|
|
|
|
# Both tables must have names. Tables get names either by setting them |
|
495
|
|
|
|
|
|
|
# with the name() method, or from the filename in the import_headered |
|
496
|
|
|
|
|
|
|
# method. |
|
497
|
|
|
|
|
|
|
#### |
|
498
|
|
|
|
|
|
|
sub join |
|
499
|
|
|
|
|
|
|
{ |
|
500
|
0
|
|
|
0
|
0
|
|
my ($lefttable, $righttable, $leftfield, $rightfield, $side) = @_ ; |
|
501
|
|
|
|
|
|
|
|
|
502
|
0
|
0
|
0
|
|
|
|
if (!$lefttable->name() || !$righttable->name()) { |
|
503
|
0
|
|
|
|
|
|
croak "both tables must have name()s" ; |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
my $leftcol = $lefttable->{fieldlist}->find_colnum($leftfield) ; |
|
507
|
0
|
0
|
|
|
|
|
croak "no field $leftfield in left table" if !$leftfield ; |
|
508
|
0
|
|
|
|
|
|
my $rightcol = $righttable->{fieldlist}->find_colnum($rightfield) ; |
|
509
|
0
|
0
|
|
|
|
|
croak "no field $rightfield in right table" if !$rightfield ; |
|
510
|
|
|
|
|
|
|
|
|
511
|
0
|
0
|
0
|
|
|
|
if ($righttable->{cdb}{$rightcol} && $side ne 'RIGHT' && $side ne 'OUTER') { |
|
|
|
|
0
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
|
if ($VERBOSE) { |
|
513
|
0
|
|
|
|
|
|
carp "index joining $lefttable->{name} with $righttable->{name}" ; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
0
|
|
|
|
|
|
return $lefttable->_join_using_index($righttable, |
|
516
|
|
|
|
|
|
|
$leftcol, $rightcol, $side) ; |
|
517
|
|
|
|
|
|
|
} |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# tables must be sorted by field. |
|
520
|
0
|
0
|
0
|
|
|
|
if (!$lefttable->{sorted_colnum} || $lefttable->{sorted_colnum} ne $leftcol) { |
|
521
|
0
|
|
|
|
|
|
$lefttable = $lefttable->order($leftfield) ; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
0
|
0
|
0
|
|
|
|
if (!$righttable->{sorted_colnum} || $righttable->{sorted_colnum} ne $rightcol) { |
|
524
|
0
|
|
|
|
|
|
$righttable = $righttable->order($rightfield) ; |
|
525
|
|
|
|
|
|
|
} |
|
526
|
|
|
|
|
|
|
|
|
527
|
0
|
0
|
|
|
|
|
carp "joining $lefttable->{name} with $righttable->{name}" if $VERBOSE ; |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# create a format string for join(1). |
|
531
|
|
|
|
|
|
|
# Looks like |
|
532
|
|
|
|
|
|
|
# 1.1,1.2,1.3,1.4, ... ,2.1,2.2,2.3, ... |
|
533
|
|
|
|
|
|
|
|
|
534
|
0
|
|
|
|
|
|
my $format = |
|
535
|
0
|
|
|
|
|
|
join(",", map { "1.$_" } 1..$lefttable->{fieldlist}->fieldcount()) |
|
536
|
|
|
|
|
|
|
. "," . |
|
537
|
0
|
|
|
|
|
|
join(",", map { "2.$_" } 1..$righttable->{fieldlist}->fieldcount()) ; |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
|
my $command = "$JOIN -1 $leftcol -2 $rightcol -o $format -t '\t' " ; |
|
542
|
|
|
|
|
|
|
|
|
543
|
0
|
0
|
0
|
|
|
|
if ($side eq 'LEFT') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
|
$command .= "-a 1 " |
|
545
|
|
|
|
|
|
|
} elsif ($side eq 'RIGHT') { |
|
546
|
0
|
|
|
|
|
|
$command .= "-a 2 " ; |
|
547
|
|
|
|
|
|
|
} elsif ($side eq 'OUTER') { |
|
548
|
0
|
|
|
|
|
|
$command .= "-a 1 -a 2 " ; |
|
549
|
|
|
|
|
|
|
} elsif (defined $side && $side ne 'INNER') { |
|
550
|
0
|
|
|
|
|
|
croak "invalid side argument" ; |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
|
$command .= $lefttable->{filename} . " " ; |
|
554
|
0
|
|
|
|
|
|
$command .= $righttable->{filename} . " " ; |
|
555
|
|
|
|
|
|
|
|
|
556
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
557
|
0
|
|
|
|
|
|
$command .= "> $newf" ; |
|
558
|
|
|
|
|
|
|
|
|
559
|
0
|
|
|
|
|
|
system $command ; |
|
560
|
|
|
|
|
|
|
|
|
561
|
0
|
0
|
|
|
|
|
croak "join failed" if $? ; |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# We've now joined the files, so we just have to create a fieldlist |
|
565
|
|
|
|
|
|
|
# for the new table. |
|
566
|
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
my $leftlistcopy = $lefttable->{fieldlist}->deepcopy ; |
|
568
|
0
|
|
|
|
|
|
foreach my $field ($leftlistcopy->fields) { |
|
569
|
0
|
|
|
|
|
|
$field->add_name( $lefttable->name . "." . $field->name() ) ; |
|
570
|
|
|
|
|
|
|
} |
|
571
|
0
|
|
|
|
|
|
my $rightlistcopy = $righttable->{fieldlist}->deepcopy ; |
|
572
|
0
|
|
|
|
|
|
foreach my $field ($rightlistcopy->fields) { |
|
573
|
0
|
|
|
|
|
|
$field->add_name( $righttable->name . "." . $field->name() ) ; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# we've now got copies of the two fieldlists, with new aliases for |
|
577
|
|
|
|
|
|
|
# the field names of the form tablename.fieldname. Construct |
|
578
|
|
|
|
|
|
|
# a final field list from these two lists. |
|
579
|
|
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
|
my @fields = ($leftlistcopy->fields, $rightlistcopy->fields) ; |
|
581
|
|
|
|
|
|
|
|
|
582
|
0
|
|
|
|
|
|
my $newtable = { |
|
583
|
|
|
|
|
|
|
name => $lefttable->{name}, |
|
584
|
|
|
|
|
|
|
filename => $newf, |
|
585
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
|
586
|
|
|
|
|
|
|
sorted_colnum => $leftcol, |
|
587
|
|
|
|
|
|
|
} ; |
|
588
|
|
|
|
|
|
|
|
|
589
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
|
590
|
0
|
|
|
|
|
|
carp "Warning: Useless join in void context." ; |
|
591
|
|
|
|
|
|
|
} |
|
592
|
0
|
|
|
|
|
|
bless $newtable, ref $lefttable ; |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
#### |
|
596
|
|
|
|
|
|
|
# called by ->join() to perform a join when there is an appropriate cdb index |
|
597
|
|
|
|
|
|
|
# present on the right side table and it's not a right join. |
|
598
|
|
|
|
|
|
|
# |
|
599
|
|
|
|
|
|
|
# The column numbers passed in are 1-based. |
|
600
|
|
|
|
|
|
|
#### |
|
601
|
|
|
|
|
|
|
sub _join_using_index |
|
602
|
|
|
|
|
|
|
{ |
|
603
|
0
|
|
|
0
|
|
|
my ($lefttable, $righttable, $leftcol, $rightcol, $side) = @_ ; |
|
604
|
|
|
|
|
|
|
|
|
605
|
0
|
|
|
|
|
|
my $isleftjoin = $side eq 'LEFT' ; |
|
606
|
0
|
|
|
|
|
|
my $emptyright ; |
|
607
|
0
|
0
|
|
|
|
|
if ($isleftjoin) { |
|
608
|
0
|
|
|
|
|
|
$emptyright = "\t" x ($righttable->{fieldlist}->fieldcount() - 1) ; |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
|
|
611
|
0
|
0
|
|
|
|
|
open(LEFTF, $lefttable->{filename}) || croak ; |
|
612
|
0
|
|
|
|
|
|
require CDB_File ; |
|
613
|
|
|
|
|
|
|
|
|
614
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
|
615
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!" ; |
|
616
|
|
|
|
|
|
|
|
|
617
|
0
|
|
|
|
|
|
my %right ; |
|
618
|
|
|
|
|
|
|
|
|
619
|
0
|
0
|
|
|
|
|
tie (%right, 'CDB_File', $righttable->{cdb}{$rightcol}) || die ; |
|
620
|
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# create a regex that will extract the join field from a tab delimited |
|
622
|
|
|
|
|
|
|
# line. |
|
623
|
0
|
|
|
|
|
|
my $regex = '^' . ('[^\t]*\t' x ($leftcol-1)) . '([^\t]*)' ; |
|
624
|
0
|
|
|
|
|
|
$regex = qr($regex) ; |
|
625
|
|
|
|
|
|
|
|
|
626
|
0
|
|
|
|
|
|
my $leftfieldcount = $lefttable->{fieldlist}->fieldcount() ; |
|
627
|
0
|
|
|
|
|
|
my $rightfieldcount = $righttable->{fieldlist}->fieldcount() ; |
|
628
|
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
while () { |
|
630
|
0
|
|
|
|
|
|
chomp ; |
|
631
|
0
|
|
|
|
|
|
_add_missing_tabs(\$_, $leftfieldcount) ; |
|
632
|
0
|
0
|
|
|
|
|
/$regex/ || die "malformed temp file in line $_" ; |
|
633
|
0
|
|
|
|
|
|
my $key = $1 ; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
|
636
|
0
|
0
|
|
|
|
|
if (exists $right{$key}) { |
|
637
|
|
|
|
|
|
|
# found a match. Print a complete line. |
|
638
|
0
|
|
|
|
|
|
my $val = $right{$key} ; |
|
639
|
0
|
|
|
|
|
|
_add_missing_tabs(\$val, $rightfieldcount) ; |
|
640
|
0
|
|
|
|
|
|
print NEWF CORE::join("\t", $_, $val), "\n" ; |
|
641
|
|
|
|
|
|
|
} else { |
|
642
|
|
|
|
|
|
|
# didn't match. print a line if it's a left join, otherwise skip it. |
|
643
|
0
|
0
|
|
|
|
|
if ($isleftjoin) { |
|
644
|
0
|
|
|
|
|
|
print NEWF CORE::join("\t", $_, $emptyright), "\n" ; |
|
645
|
|
|
|
|
|
|
} |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
|
|
649
|
0
|
|
|
|
|
|
untie %right ; |
|
650
|
|
|
|
|
|
|
|
|
651
|
0
|
|
|
|
|
|
close LEFTF ; |
|
652
|
0
|
|
|
|
|
|
close NEWF ; |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# We've now joined the files, so we just have to create a fieldlist |
|
655
|
|
|
|
|
|
|
# for the new table. |
|
656
|
|
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
|
my $leftlistcopy = $lefttable->{fieldlist}->deepcopy ; |
|
658
|
0
|
|
|
|
|
|
foreach my $field ($leftlistcopy->fields) { |
|
659
|
0
|
|
|
|
|
|
$field->add_name( $lefttable->name . "." . $field->name() ) ; |
|
660
|
|
|
|
|
|
|
} |
|
661
|
0
|
|
|
|
|
|
my $rightlistcopy = $righttable->{fieldlist}->deepcopy ; |
|
662
|
0
|
|
|
|
|
|
foreach my $field ($rightlistcopy->fields) { |
|
663
|
0
|
|
|
|
|
|
$field->add_name( $righttable->name . "." . $field->name() ) ; |
|
664
|
|
|
|
|
|
|
} |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
# we've now got copies of the two fieldlists, with new aliases for |
|
667
|
|
|
|
|
|
|
# the field names of the form tablename.fieldname. Construct |
|
668
|
|
|
|
|
|
|
# a final field list from these two lists. |
|
669
|
|
|
|
|
|
|
|
|
670
|
0
|
|
|
|
|
|
my @fields = ($leftlistcopy->fields, $rightlistcopy->fields) ; |
|
671
|
|
|
|
|
|
|
|
|
672
|
0
|
|
|
|
|
|
my $newtable = { |
|
673
|
|
|
|
|
|
|
name => $lefttable->{name}, |
|
674
|
|
|
|
|
|
|
filename => $newf, |
|
675
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
|
676
|
|
|
|
|
|
|
} ; |
|
677
|
|
|
|
|
|
|
|
|
678
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
|
679
|
0
|
|
|
|
|
|
carp "Warning: Useless join in void context." ; |
|
680
|
|
|
|
|
|
|
} |
|
681
|
0
|
|
|
|
|
|
bless $newtable, ref $lefttable ; |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
} |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
# Given a ref to a string that's supposed to have n columns, make sure there are |
|
686
|
|
|
|
|
|
|
# n-1 tabs by adding more at the end. |
|
687
|
|
|
|
|
|
|
sub _add_missing_tabs |
|
688
|
|
|
|
|
|
|
{ |
|
689
|
0
|
|
|
0
|
|
|
my ($strref, $n) = @_ ; |
|
690
|
|
|
|
|
|
|
|
|
691
|
0
|
|
|
|
|
|
my $tabcount = ($$strref =~ tr/\t/\t/) ; |
|
692
|
|
|
|
|
|
|
|
|
693
|
0
|
0
|
|
|
|
|
if ($tabcount < $n-1) { |
|
694
|
0
|
|
|
|
|
|
$$strref .= "\t" x ( $n-1-$tabcount ) ; |
|
695
|
|
|
|
|
|
|
} |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
#### |
|
700
|
|
|
|
|
|
|
# processes a table and creates a new one with different stuff. |
|
701
|
|
|
|
|
|
|
# |
|
702
|
|
|
|
|
|
|
# parameters: |
|
703
|
|
|
|
|
|
|
# table is a Text::TabTable object. |
|
704
|
|
|
|
|
|
|
# |
|
705
|
|
|
|
|
|
|
# fieldspecs is a listref containing items of any of the following forms |
|
706
|
|
|
|
|
|
|
# fieldname ( a simple scalar ) |
|
707
|
|
|
|
|
|
|
# [fieldname, newfieldname] ( for "cd_table.id as cd_id") |
|
708
|
|
|
|
|
|
|
# [sub {...}, newfieldname, [list of fieldnames]] |
|
709
|
|
|
|
|
|
|
# (for calculated fields. The sub receives values for the |
|
710
|
|
|
|
|
|
|
# listed fields as parameters, and returns the new value) |
|
711
|
|
|
|
|
|
|
# fieldspecs can also be a simple "*", which returns all fields unchanged. |
|
712
|
|
|
|
|
|
|
# |
|
713
|
|
|
|
|
|
|
# wheresub is an optional subref. It is passed an object with getvalue, |
|
714
|
|
|
|
|
|
|
# setvalue, and autoloaded field-name-named methods to get and set values |
|
715
|
|
|
|
|
|
|
# of fields by name. It is expected to return a true value if the |
|
716
|
|
|
|
|
|
|
# row should be included in the output. |
|
717
|
|
|
|
|
|
|
#### |
|
718
|
|
|
|
|
|
|
sub select |
|
719
|
|
|
|
|
|
|
{ |
|
720
|
0
|
|
|
0
|
0
|
|
my ($table, $fieldspecs, $wheresub) = @_ ; |
|
721
|
|
|
|
|
|
|
|
|
722
|
0
|
0
|
|
|
|
|
carp "selecting from $table->{name}" if $VERBOSE ; |
|
723
|
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
|
my $newtable = { name => $table->{name} } ; |
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# this gets set to zero if there is a where clause or calculated columns. |
|
727
|
|
|
|
|
|
|
# Otherwise it just runs /bin/cut to pick the right columns. |
|
728
|
0
|
|
|
|
|
|
my $cut_ok = 1 ; |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# create a field list for the new table based on the selected fields. |
|
731
|
|
|
|
|
|
|
# also create an array saying how to calculate each output field. |
|
732
|
0
|
|
|
|
|
|
my @fieldrules ; |
|
733
|
0
|
0
|
0
|
|
|
|
if (!ref $fieldspecs && ($fieldspecs eq '*' || !defined $fieldspecs)) { |
|
|
|
|
0
|
|
|
|
|
|
734
|
|
|
|
|
|
|
# simple case. Just copy the fieldlist. |
|
735
|
0
|
|
|
|
|
|
undef $fieldspecs ; |
|
736
|
0
|
|
|
|
|
|
$newtable->{fieldlist} = $table->{fieldlist}->deepcopy() ; |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# we don't need any rules in this case; input = output |
|
739
|
|
|
|
|
|
|
} else { |
|
740
|
|
|
|
|
|
|
# make a new fieldlist, and rules. |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# @fields is the list of Field objects being built. |
|
743
|
0
|
|
|
|
|
|
my @fields ; |
|
744
|
|
|
|
|
|
|
|
|
745
|
0
|
|
|
|
|
|
foreach my $fieldspec (@$fieldspecs) { |
|
746
|
0
|
0
|
|
|
|
|
if (!ref $fieldspec) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# a simple scalar, representing a field name. |
|
748
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldspec) ; |
|
749
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
|
750
|
0
|
|
|
|
|
|
croak "no field $fieldspec in table" ; |
|
751
|
|
|
|
|
|
|
} |
|
752
|
|
|
|
|
|
|
# find_colnum returns 1-based column numbers. |
|
753
|
0
|
|
|
|
|
|
push @fieldrules, $colnum - 1 ; |
|
754
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec) ; |
|
755
|
|
|
|
|
|
|
} elsif (@$fieldspec == 2) { |
|
756
|
|
|
|
|
|
|
# a field name to look up and what to call it in the output table. |
|
757
|
|
|
|
|
|
|
|
|
758
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldspec->[0]) ; |
|
759
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
|
760
|
0
|
|
|
|
|
|
croak "no field $fieldspec->[0] in table" ; |
|
761
|
|
|
|
|
|
|
} |
|
762
|
|
|
|
|
|
|
# find_colnum returns 1-based column numbers. |
|
763
|
0
|
|
|
|
|
|
push @fieldrules, $colnum - 1 ; |
|
764
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec->[1]) ; |
|
765
|
|
|
|
|
|
|
} elsif (@$fieldspec == 3) { |
|
766
|
|
|
|
|
|
|
# A subref, a new column name, and a list of columns to pass to |
|
767
|
|
|
|
|
|
|
# the subref. |
|
768
|
0
|
|
|
|
|
|
my @paramcols ; |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
# since we're doing a calculated column, we have to use perl instead |
|
771
|
|
|
|
|
|
|
# of /bin/cut. |
|
772
|
0
|
|
|
|
|
|
$cut_ok = 0 ; |
|
773
|
|
|
|
|
|
|
|
|
774
|
0
|
|
|
|
|
|
foreach my $fieldname (@{$fieldspec->[2]}) { |
|
|
0
|
|
|
|
|
|
|
|
775
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldname) ; |
|
776
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
|
777
|
0
|
|
|
|
|
|
croak "no field $fieldname in table" ; |
|
778
|
|
|
|
|
|
|
} |
|
779
|
0
|
|
|
|
|
|
push @paramcols, $colnum-1 ; |
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
# create a rule consiting of the subref followed by a listref of |
|
782
|
|
|
|
|
|
|
# what columns to get parameters from. |
|
783
|
0
|
|
|
|
|
|
push @fieldrules, [ $fieldspec->[0], \@paramcols ] ; |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
# fieldname is the new name passed in. |
|
786
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec->[1]) ; |
|
787
|
|
|
|
|
|
|
} else { |
|
788
|
0
|
|
|
|
|
|
croak "bad fieldspec" ; |
|
789
|
|
|
|
|
|
|
} |
|
790
|
|
|
|
|
|
|
} |
|
791
|
|
|
|
|
|
|
|
|
792
|
0
|
|
|
|
|
|
$newtable->{fieldlist} = Text::TabTable::FieldList->new(@fields) ; |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
|
|
795
|
0
|
|
|
|
|
|
$newtable->{filename} = _make_tempfile() ; |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# build a hash saying which field is in which position in the input. |
|
798
|
0
|
|
|
|
|
|
my %fieldloc ; |
|
799
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
# cut won't reorder columns, which angers me. So if the columns aren't |
|
802
|
|
|
|
|
|
|
# sorted, don't use cut. |
|
803
|
0
|
|
|
|
|
|
my $test_unsort = CORE::join(" ", @fieldrules) ; |
|
804
|
0
|
|
|
|
|
|
my $test_sort = CORE::join(" ", sort {$a <=> $b } @fieldrules) ; |
|
|
0
|
|
|
|
|
|
|
|
805
|
0
|
0
|
|
|
|
|
if ($test_unsort ne $test_sort) { |
|
806
|
0
|
|
|
|
|
|
$cut_ok = 0 ; |
|
807
|
|
|
|
|
|
|
} |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# now $newtable->{fieldlist} contains the table names. We're done with that. |
|
810
|
|
|
|
|
|
|
# $newtable->{filename} contains the name of the file to be created. |
|
811
|
|
|
|
|
|
|
# @fieldrules tells us how to create each output column. |
|
812
|
|
|
|
|
|
|
# %fieldloc says which column number a field name can be found in. |
|
813
|
|
|
|
|
|
|
# so it's time to start processing. |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
|
|
816
|
0
|
0
|
0
|
|
|
|
if ($cut_ok && !$wheresub && $fieldspecs) { |
|
|
|
|
0
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# there aren't any calculated columns, and there's no where clause, |
|
818
|
|
|
|
|
|
|
# so we can just use cut to pick the columns they wanted. |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
# @fieldrules has zero-based column numbers. Make one-based. |
|
821
|
0
|
|
|
|
|
|
my @cutfields = map { $_ + 1 } @fieldrules ; |
|
|
0
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
|
|
823
|
0
|
0
|
|
|
|
|
carp "...selecting using cut" if $VERBOSE ; |
|
824
|
0
|
|
|
|
|
|
system "cut -f" . CORE::join(',', @cutfields) . |
|
825
|
|
|
|
|
|
|
" $table->{filename} > $newtable->{filename}" ; |
|
826
|
0
|
0
|
|
|
|
|
if ($?) { |
|
827
|
0
|
|
|
|
|
|
unlink $newtable->{filename} ; |
|
828
|
0
|
|
|
|
|
|
croak "cut error in select" ; |
|
829
|
|
|
|
|
|
|
} |
|
830
|
|
|
|
|
|
|
} else { |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
# process the file using perl. |
|
833
|
|
|
|
|
|
|
|
|
834
|
0
|
0
|
|
|
|
|
open(INFILE, $table->{filename}) || croak "can't open table file" ; |
|
835
|
0
|
0
|
|
|
|
|
open(OUTFILE, ">$newtable->{filename}") || croak "can't open output table file" ; |
|
836
|
|
|
|
|
|
|
|
|
837
|
0
|
|
|
|
|
|
while() { |
|
838
|
0
|
|
|
|
|
|
chomp ; |
|
839
|
0
|
|
|
|
|
|
my @values = split(/\t/, $_, 999999) ; |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
# run the where clause subroutine, if any, and skip if it says to. |
|
842
|
0
|
0
|
|
|
|
|
if ($wheresub) { |
|
843
|
0
|
|
|
|
|
|
my $rowdata = bless([\%fieldloc, \@values], 'Text::TabTable::DataRow') ; |
|
844
|
0
|
0
|
|
|
|
|
next if !&$wheresub($rowdata) ; |
|
845
|
|
|
|
|
|
|
} |
|
846
|
|
|
|
|
|
|
|
|
847
|
0
|
0
|
|
|
|
|
if (!$fieldspecs) { |
|
848
|
|
|
|
|
|
|
# select *. Just print them out. |
|
849
|
0
|
|
|
|
|
|
print OUTFILE CORE::join("\t", @values), "\n" ; |
|
850
|
|
|
|
|
|
|
} else { |
|
851
|
0
|
|
|
|
|
|
my @outvals ; |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
# use the @fieldrules to create @outvals from @values. |
|
854
|
0
|
|
|
|
|
|
foreach my $rule (@fieldrules) { |
|
855
|
0
|
0
|
|
|
|
|
if (!ref $rule) { |
|
856
|
0
|
|
|
|
|
|
push @outvals, $values[$rule] ; |
|
857
|
|
|
|
|
|
|
} else { |
|
858
|
|
|
|
|
|
|
# it's an arrayref containing a subref and a bunch of column |
|
859
|
|
|
|
|
|
|
# numbers. Call the subroutine with the values pointed to |
|
860
|
|
|
|
|
|
|
# by those column numbers and use the return value as the |
|
861
|
|
|
|
|
|
|
# output field value. |
|
862
|
0
|
|
|
|
|
|
my @params = map { $values[$_] } @{$rule->[1]} ; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
863
|
0
|
|
|
|
|
|
my $subref = $rule->[0] ; |
|
864
|
0
|
|
|
|
|
|
push @outvals, scalar(&$subref(@params)) ; |
|
865
|
|
|
|
|
|
|
} |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
|
|
868
|
0
|
|
|
|
|
|
print OUTFILE CORE::join("\t", @outvals), "\n" ; |
|
869
|
|
|
|
|
|
|
} |
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
|
|
872
|
0
|
|
|
|
|
|
close OUTFILE ; |
|
873
|
0
|
|
|
|
|
|
close INFILE ; |
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
|
877
|
0
|
|
|
|
|
|
carp "Warning: select used in void context." ; |
|
878
|
|
|
|
|
|
|
} |
|
879
|
|
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
return bless $newtable, ref $table ; |
|
881
|
|
|
|
|
|
|
} |
|
882
|
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
#### |
|
884
|
|
|
|
|
|
|
# Fills in a hash with a mapping from field name to column number. |
|
885
|
|
|
|
|
|
|
#### |
|
886
|
|
|
|
|
|
|
sub _build_fieldloc |
|
887
|
|
|
|
|
|
|
{ |
|
888
|
0
|
|
|
0
|
|
|
my ($table, $hr_fieldloc) = @_ ; |
|
889
|
0
|
|
|
|
|
|
my $pos = 0 ; |
|
890
|
0
|
|
|
|
|
|
foreach my $field ($table->{fieldlist}->fields()) { |
|
891
|
0
|
|
|
|
|
|
foreach my $fieldname ($field->names()) { |
|
892
|
0
|
0
|
|
|
|
|
$hr_fieldloc->{$fieldname} = $pos if !exists $hr_fieldloc->{$fieldname} ; |
|
893
|
|
|
|
|
|
|
} |
|
894
|
0
|
|
|
|
|
|
$pos++ ; |
|
895
|
|
|
|
|
|
|
} |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
#### |
|
899
|
|
|
|
|
|
|
# Runs through the rows of a tab table, calling a subroutine for each |
|
900
|
|
|
|
|
|
|
# line. The subroutine has the same calling convention as the where |
|
901
|
|
|
|
|
|
|
# part of a select() call. |
|
902
|
|
|
|
|
|
|
# |
|
903
|
|
|
|
|
|
|
# This is like a select(undef,sub {}) but does not return a new table. |
|
904
|
|
|
|
|
|
|
#### |
|
905
|
|
|
|
|
|
|
sub iterate |
|
906
|
|
|
|
|
|
|
{ |
|
907
|
0
|
|
|
0
|
0
|
|
my ($table, $wheresub) = @_ ; |
|
908
|
0
|
0
|
|
|
|
|
carp "iterating over $table->{name}" if $VERBOSE ; |
|
909
|
|
|
|
|
|
|
|
|
910
|
0
|
0
|
|
|
|
|
open(INFILE, $table->{filename}) || croak "can't open table file" ; |
|
911
|
|
|
|
|
|
|
|
|
912
|
0
|
0
|
|
|
|
|
die if !$wheresub ; |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# build a hash saying which field is in which position in the input. |
|
915
|
0
|
|
|
|
|
|
my %fieldloc ; |
|
916
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
|
917
|
|
|
|
|
|
|
|
|
918
|
0
|
|
|
|
|
|
while() { |
|
919
|
0
|
|
|
|
|
|
chomp ; |
|
920
|
0
|
|
|
|
|
|
my @values = split(/\t/, $_, 999999) ; |
|
921
|
|
|
|
|
|
|
|
|
922
|
0
|
|
|
|
|
|
my $rowdata = bless([\%fieldloc, \@values], 'Text::TabTable::DataRow') ; |
|
923
|
0
|
|
|
|
|
|
&$wheresub($rowdata) ; |
|
924
|
|
|
|
|
|
|
} |
|
925
|
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
|
close INFILE ; |
|
927
|
|
|
|
|
|
|
} |
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
#### |
|
930
|
|
|
|
|
|
|
# Returns an object with a next() method, which gives one row object each |
|
931
|
|
|
|
|
|
|
# time next() is called. |
|
932
|
|
|
|
|
|
|
# |
|
933
|
|
|
|
|
|
|
# If -unescape=>1, the tab/backslash/newline escaping will be removed. |
|
934
|
|
|
|
|
|
|
#### |
|
935
|
|
|
|
|
|
|
sub make_iterator |
|
936
|
|
|
|
|
|
|
{ |
|
937
|
0
|
|
|
0
|
0
|
|
my ($table, %args) = @_ ; |
|
938
|
0
|
0
|
|
|
|
|
carp "iterating over $table->{name}" if $VERBOSE ; |
|
939
|
|
|
|
|
|
|
|
|
940
|
0
|
0
|
|
|
|
|
if ($args{-unescape}) { |
|
941
|
0
|
|
|
|
|
|
return Text::TabTable::Iterator::Unescaping->new($table) ; |
|
942
|
|
|
|
|
|
|
} else { |
|
943
|
0
|
|
|
|
|
|
return Text::TabTable::Iterator->new($table) ; |
|
944
|
|
|
|
|
|
|
} |
|
945
|
|
|
|
|
|
|
} |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
sub DESTROY |
|
948
|
|
|
|
|
|
|
{ |
|
949
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
950
|
0
|
0
|
|
|
|
|
if (!$self->{dontdelete}) { |
|
951
|
0
|
|
|
|
|
|
unlink $self->{filename} ; |
|
952
|
|
|
|
|
|
|
} |
|
953
|
|
|
|
|
|
|
|
|
954
|
0
|
|
|
|
|
|
foreach my $cdbfile ( values %{$self->{cdb}} ) { |
|
|
0
|
|
|
|
|
|
|
|
955
|
0
|
|
|
|
|
|
unlink $cdbfile ; |
|
956
|
|
|
|
|
|
|
} |
|
957
|
|
|
|
|
|
|
} |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
#### |
|
960
|
|
|
|
|
|
|
# Creates a temporary file and returns its filename. |
|
961
|
|
|
|
|
|
|
#### |
|
962
|
1
|
|
|
1
|
|
15
|
use vars qw(@TEMPFILES) ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
388
|
|
|
963
|
|
|
|
|
|
|
sub _make_tempfile |
|
964
|
|
|
|
|
|
|
{ |
|
965
|
0
|
|
|
0
|
|
|
my $watchdog = 0 ; |
|
966
|
0
|
|
|
|
|
|
while ($watchdog++ < 1000) { |
|
967
|
0
|
|
|
|
|
|
my $fname = "$TMPDIR/$$." . int(rand(9999999)) ; |
|
968
|
0
|
|
|
|
|
|
my $status = sysopen TEMPF, $fname, O_CREAT | O_WRONLY | O_EXCL, 0666 ; |
|
969
|
0
|
|
|
|
|
|
close(TEMPF) ; |
|
970
|
0
|
0
|
|
|
|
|
if (defined $status) { |
|
971
|
0
|
|
|
|
|
|
push @TEMPFILES, $fname ; |
|
972
|
0
|
|
|
|
|
|
return $fname ; |
|
973
|
|
|
|
|
|
|
} |
|
974
|
|
|
|
|
|
|
} |
|
975
|
0
|
|
|
|
|
|
die "couldn't create a temporary file\n" ; |
|
976
|
|
|
|
|
|
|
} |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
END { |
|
979
|
|
|
|
|
|
|
# delete any tempfiles that didn't get deleted. This shouldn't happen. |
|
980
|
1
|
|
|
1
|
|
308
|
foreach my $file (@TEMPFILES) { |
|
981
|
0
|
|
|
|
|
|
unlink $file ; |
|
982
|
|
|
|
|
|
|
} |
|
983
|
|
|
|
|
|
|
} |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
############################################################################### |
|
987
|
|
|
|
|
|
|
# Text::TabTable::Field |
|
988
|
|
|
|
|
|
|
############################################################################### |
|
989
|
|
|
|
|
|
|
package Text::TabTable::Field ; |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
sub new |
|
992
|
|
|
|
|
|
|
{ |
|
993
|
0
|
|
|
0
|
|
|
my ($package, $fieldname) = @_ ; |
|
994
|
0
|
|
|
|
|
|
my $self = { names => [$fieldname] } ; |
|
995
|
0
|
|
|
|
|
|
bless $self, $package ; |
|
996
|
|
|
|
|
|
|
} |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub name |
|
999
|
|
|
|
|
|
|
{ |
|
1000
|
0
|
|
|
0
|
|
|
return $_[0]->{names}->[0] ; |
|
1001
|
|
|
|
|
|
|
} |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
#### |
|
1004
|
|
|
|
|
|
|
# Return all the names for this field. |
|
1005
|
|
|
|
|
|
|
sub names |
|
1006
|
|
|
|
|
|
|
{ |
|
1007
|
0
|
|
|
0
|
|
|
return @{$_[0]->{names}} ; |
|
|
0
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
} |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
sub has_name |
|
1011
|
|
|
|
|
|
|
{ |
|
1012
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1013
|
0
|
|
|
|
|
|
my $name = shift ; |
|
1014
|
0
|
0
|
|
|
|
|
if (grep( $_ eq $name, @{$self->{names}})) { |
|
|
0
|
|
|
|
|
|
|
|
1015
|
0
|
|
|
|
|
|
return 1 ; |
|
1016
|
|
|
|
|
|
|
} else { |
|
1017
|
0
|
|
|
|
|
|
return 0 ; |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
#### |
|
1022
|
|
|
|
|
|
|
# Add an alias name to a field. |
|
1023
|
|
|
|
|
|
|
#### |
|
1024
|
|
|
|
|
|
|
sub add_name |
|
1025
|
|
|
|
|
|
|
{ |
|
1026
|
0
|
|
|
0
|
|
|
my ($self, @names) = @_ ; |
|
1027
|
0
|
|
|
|
|
|
push @{$self->{names}}, @names ; |
|
|
0
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
} |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
#### |
|
1031
|
|
|
|
|
|
|
# sets the name of the field, wiping out all previous aliases. |
|
1032
|
|
|
|
|
|
|
#### |
|
1033
|
|
|
|
|
|
|
sub set_name |
|
1034
|
|
|
|
|
|
|
{ |
|
1035
|
0
|
|
|
0
|
|
|
my ($self, $name) = @_ ; |
|
1036
|
0
|
|
|
|
|
|
$self->{names} = [$name] ; |
|
1037
|
|
|
|
|
|
|
} |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
############################################################################### |
|
1040
|
|
|
|
|
|
|
# Text::TabTable::FieldList |
|
1041
|
|
|
|
|
|
|
# |
|
1042
|
|
|
|
|
|
|
# Represents the list of Fields on a table. |
|
1043
|
|
|
|
|
|
|
############################################################################### |
|
1044
|
|
|
|
|
|
|
package Text::TabTable::FieldList ; |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
1
|
|
|
1
|
|
5
|
use Data::Dumper ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
113
|
|
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
sub new |
|
1049
|
|
|
|
|
|
|
{ |
|
1050
|
0
|
|
|
0
|
|
|
my ($package, @fields) = @_ ; |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
0
|
|
|
|
|
|
bless { fields => \@fields }, $package ; |
|
1053
|
|
|
|
|
|
|
} |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
sub deepcopy |
|
1056
|
|
|
|
|
|
|
{ |
|
1057
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
1
|
|
|
1
|
|
8
|
no strict ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
292
|
|
|
1060
|
0
|
|
|
|
|
|
my $newent = eval Dumper($self) ; |
|
1061
|
|
|
|
|
|
|
} |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
#### |
|
1064
|
|
|
|
|
|
|
# Returns a 1-based column number for the given field, or undef |
|
1065
|
|
|
|
|
|
|
# if not present. |
|
1066
|
|
|
|
|
|
|
#### |
|
1067
|
|
|
|
|
|
|
sub find_colnum |
|
1068
|
|
|
|
|
|
|
{ |
|
1069
|
0
|
|
|
0
|
|
|
my ($self, $fieldname) = @_ ; |
|
1070
|
0
|
|
|
|
|
|
for (my $i = 0 ; $i < @{$self->{fields}} ; $i++) { |
|
|
0
|
|
|
|
|
|
|
|
1071
|
0
|
0
|
|
|
|
|
if ($self->{fields}->[$i]->has_name($fieldname)) { |
|
1072
|
0
|
|
|
|
|
|
return $i+1 ; |
|
1073
|
|
|
|
|
|
|
} |
|
1074
|
|
|
|
|
|
|
} |
|
1075
|
0
|
|
|
|
|
|
return undef ; |
|
1076
|
|
|
|
|
|
|
} |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
#### |
|
1079
|
|
|
|
|
|
|
# Return the number of fields. |
|
1080
|
|
|
|
|
|
|
#### |
|
1081
|
|
|
|
|
|
|
sub fieldcount |
|
1082
|
|
|
|
|
|
|
{ |
|
1083
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1084
|
0
|
|
|
|
|
|
return scalar(@{$self->{fields}}) ; |
|
|
0
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
} |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#### |
|
1088
|
|
|
|
|
|
|
# Return field names as a tab-delimited string. |
|
1089
|
|
|
|
|
|
|
#### |
|
1090
|
|
|
|
|
|
|
sub as_string |
|
1091
|
|
|
|
|
|
|
{ |
|
1092
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1093
|
0
|
|
|
|
|
|
my @names = map {$_->name} @{$self->{fields}} ; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
1094
|
0
|
|
|
|
|
|
return join("\t", @names) ; |
|
1095
|
|
|
|
|
|
|
} |
|
1096
|
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub fields |
|
1098
|
|
|
|
|
|
|
{ |
|
1099
|
0
|
|
|
0
|
|
|
return @{$_[0]->{fields}} ; |
|
|
0
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
} |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
############################################################################### |
|
1103
|
|
|
|
|
|
|
# Text::TabTable::Iterator |
|
1104
|
|
|
|
|
|
|
# Text::TabTable::Iterator::Unescaping |
|
1105
|
|
|
|
|
|
|
# |
|
1106
|
|
|
|
|
|
|
# A thing that returns one row at a time from a table. |
|
1107
|
|
|
|
|
|
|
# The ::Unescaping version will run Text::TabTable::Unescape on all the |
|
1108
|
|
|
|
|
|
|
# data first. |
|
1109
|
|
|
|
|
|
|
############################################################################### |
|
1110
|
|
|
|
|
|
|
package Text::TabTable::Iterator ; |
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
@Text::TabTable::Iterator::Unescaping::ISA = ('MP3Com::TabTable::Iterator') ; |
|
1113
|
|
|
|
|
|
|
|
|
1114
|
1
|
|
|
1
|
|
6
|
use strict ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
33
|
|
|
1115
|
1
|
|
|
1
|
|
5
|
use Carp ; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
355
|
|
|
1116
|
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub new |
|
1118
|
|
|
|
|
|
|
{ |
|
1119
|
0
|
|
|
0
|
|
|
my ($package, $table) = @_ ; |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
0
|
|
|
|
|
|
require IO::File ; |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
0
|
|
|
|
|
|
my %fieldloc ; |
|
1124
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
|
1125
|
|
|
|
|
|
|
|
|
1126
|
0
|
|
0
|
|
|
|
my $fh = IO::File->new("<$table->{filename}") || croak ; |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
0
|
|
|
|
|
|
my $self = { |
|
1129
|
|
|
|
|
|
|
fieldloc => \%fieldloc, |
|
1130
|
|
|
|
|
|
|
fh => $fh |
|
1131
|
|
|
|
|
|
|
} ; |
|
1132
|
0
|
|
|
|
|
|
bless $self, $package ; |
|
1133
|
|
|
|
|
|
|
} |
|
1134
|
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
sub next |
|
1136
|
|
|
|
|
|
|
{ |
|
1137
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
|
1138
|
0
|
|
|
|
|
|
my $line = $self->{fh}->getline() ; |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
0
|
0
|
|
|
|
|
if (!$line) { |
|
1141
|
0
|
|
|
|
|
|
return undef ; |
|
1142
|
0
|
|
|
|
|
|
delete $self->{fh} ; |
|
1143
|
|
|
|
|
|
|
} |
|
1144
|
0
|
|
|
|
|
|
chomp $line ; |
|
1145
|
0
|
|
|
|
|
|
my @values = split(/\t/, $line, -1) ; |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
0
|
|
|
|
|
|
return bless([$self->{fieldloc}, \@values], 'Text::TabTable::DataRow') ; |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
sub Text::TabTable::Iterator::Unescaping::next |
|
1151
|
|
|
|
|
|
|
{ |
|
1152
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
# get a row and unescape the data in it. |
|
1155
|
|
|
|
|
|
|
|
|
1156
|
0
|
|
|
|
|
|
my $row = $self->Text::TabTable::Iterator::next() ; |
|
1157
|
0
|
0
|
|
|
|
|
return undef if !$row ; |
|
1158
|
|
|
|
|
|
|
|
|
1159
|
0
|
|
|
|
|
|
foreach my $val (@{$row->[1]}) { |
|
|
0
|
|
|
|
|
|
|
|
1160
|
0
|
|
|
|
|
|
$val = Text::TabTable::unescape($val) ; |
|
1161
|
|
|
|
|
|
|
} |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
0
|
|
|
|
|
|
return $row ; |
|
1164
|
|
|
|
|
|
|
} |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
############################################################################### |
|
1167
|
|
|
|
|
|
|
# Text::TabTable::DataRow |
|
1168
|
|
|
|
|
|
|
# |
|
1169
|
|
|
|
|
|
|
# Represents a row of data from a TabTable. |
|
1170
|
|
|
|
|
|
|
############################################################################### |
|
1171
|
|
|
|
|
|
|
package Text::TabTable::DataRow ; |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
1
|
|
|
1
|
|
5
|
use vars qw($AUTOLOAD) ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
55
|
|
|
1174
|
1
|
|
|
1
|
|
4
|
use Carp ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
44
|
|
|
1175
|
|
|
|
|
|
|
|
|
1176
|
1
|
|
|
1
|
|
5
|
use strict ; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
300
|
|
|
1177
|
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# This constructor is not actually used by select(); it blesses the |
|
1179
|
|
|
|
|
|
|
# right structure itself for speed purposes. |
|
1180
|
|
|
|
|
|
|
sub new |
|
1181
|
|
|
|
|
|
|
{ |
|
1182
|
0
|
|
|
0
|
|
|
my ($package, $name2colhash, $values) = @_ ; |
|
1183
|
0
|
|
|
|
|
|
bless [$name2colhash, $values], $package ; |
|
1184
|
|
|
|
|
|
|
} |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
sub getvalue |
|
1187
|
|
|
|
|
|
|
{ |
|
1188
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1189
|
0
|
|
|
|
|
|
my $name = shift ; |
|
1190
|
0
|
|
|
|
|
|
return $self->[1][ $self->[0]{$name} ] ; |
|
1191
|
|
|
|
|
|
|
} |
|
1192
|
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
sub setvalue |
|
1194
|
|
|
|
|
|
|
{ |
|
1195
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1196
|
0
|
|
|
|
|
|
my $name = shift ; |
|
1197
|
0
|
|
|
|
|
|
my $newval = shift ; |
|
1198
|
0
|
|
|
|
|
|
$self->[1][$self->[0]{$name}] = $newval ; |
|
1199
|
|
|
|
|
|
|
} |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
# to save work for autoload. |
|
1202
|
0
|
|
|
0
|
|
|
sub DESTROY {} ; |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
#### |
|
1205
|
|
|
|
|
|
|
# implements field-named methods for getting values. |
|
1206
|
|
|
|
|
|
|
#### |
|
1207
|
|
|
|
|
|
|
sub AUTOLOAD |
|
1208
|
|
|
|
|
|
|
{ |
|
1209
|
0
|
|
|
0
|
|
|
my $self = shift ; |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
0
|
|
|
|
|
|
my $name = $AUTOLOAD ; |
|
1212
|
0
|
|
|
|
|
|
$name =~ s/.*:// ; |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
0
|
0
|
|
|
|
|
if (!exists $self->[0]{$name}) { |
|
1215
|
0
|
|
|
|
|
|
croak "No $name field in table" ; |
|
1216
|
|
|
|
|
|
|
} |
|
1217
|
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
# create a function to calculate it. |
|
1219
|
0
|
|
|
|
|
|
eval <
|
|
1220
|
|
|
|
|
|
|
sub $name { |
|
1221
|
|
|
|
|
|
|
return \$_[0]->[1][ \$_[0]->[0]{'$name'} ] ; |
|
1222
|
|
|
|
|
|
|
} |
|
1223
|
|
|
|
|
|
|
EOT |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
0
|
|
|
|
|
|
return $self->$name() ; |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
1; |