| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# $Id$ |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# file::csv Brik |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
package Metabrik::File::Csv; |
|
7
|
2
|
|
|
2
|
|
1235
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
63
|
|
|
8
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
50
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
11
|
use base qw(Metabrik); |
|
|
2
|
|
|
|
|
26
|
|
|
|
2
|
|
|
|
|
6270
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub brik_properties { |
|
13
|
|
|
|
|
|
|
return { |
|
14
|
0
|
|
|
0
|
1
|
|
revision => '$Revision$', |
|
15
|
|
|
|
|
|
|
tags => [ qw(unstable) ], |
|
16
|
|
|
|
|
|
|
author => 'GomoR ', |
|
17
|
|
|
|
|
|
|
license => 'http://opensource.org/licenses/BSD-3-Clause', |
|
18
|
|
|
|
|
|
|
attributes => { |
|
19
|
|
|
|
|
|
|
input => [ qw(file) ], |
|
20
|
|
|
|
|
|
|
output => [ qw(file) ], |
|
21
|
|
|
|
|
|
|
first_line_is_header => [ qw(0|1) ], |
|
22
|
|
|
|
|
|
|
separator => [ qw(character) ], |
|
23
|
|
|
|
|
|
|
escape => [ qw(character) ], |
|
24
|
|
|
|
|
|
|
header => [ qw($column_header_list) ], |
|
25
|
|
|
|
|
|
|
encoding => [ qw(utf8|ascii) ], |
|
26
|
|
|
|
|
|
|
overwrite => [ qw(0|1) ], |
|
27
|
|
|
|
|
|
|
append => [ qw(0|1) ], |
|
28
|
|
|
|
|
|
|
write_header => [ qw(0|1) ], |
|
29
|
|
|
|
|
|
|
use_quoting => [ qw(0|1) ], |
|
30
|
|
|
|
|
|
|
use_locking => [ qw(0|1) ], |
|
31
|
|
|
|
|
|
|
unbuffered => [ qw(0|1) ], |
|
32
|
|
|
|
|
|
|
encoded_fields => [ qw(fields) ], |
|
33
|
|
|
|
|
|
|
object_fields => [ qw(fields) ], |
|
34
|
|
|
|
|
|
|
_csv => [ qw(INTERNAL) ], |
|
35
|
|
|
|
|
|
|
_fd => [ qw(INTERNAL) ], |
|
36
|
|
|
|
|
|
|
_sb => [ qw(INTERNAL) ], |
|
37
|
|
|
|
|
|
|
_sc => [ qw(INTERNAL) ], |
|
38
|
|
|
|
|
|
|
}, |
|
39
|
|
|
|
|
|
|
attributes_default => { |
|
40
|
|
|
|
|
|
|
first_line_is_header => 1, |
|
41
|
|
|
|
|
|
|
separator => ',', |
|
42
|
|
|
|
|
|
|
escape => '"', |
|
43
|
|
|
|
|
|
|
encoding => 'utf8', |
|
44
|
|
|
|
|
|
|
overwrite => 0, |
|
45
|
|
|
|
|
|
|
append => 1, |
|
46
|
|
|
|
|
|
|
write_header => 1, |
|
47
|
|
|
|
|
|
|
use_quoting => 0, |
|
48
|
|
|
|
|
|
|
use_locking => 0, |
|
49
|
|
|
|
|
|
|
unbuffered => 0, |
|
50
|
|
|
|
|
|
|
}, |
|
51
|
|
|
|
|
|
|
commands => { |
|
52
|
|
|
|
|
|
|
read => [ qw(input_file|OPTIONAL) ], |
|
53
|
|
|
|
|
|
|
write => [ qw(csv_struct output_file|OPTIONAL) ], |
|
54
|
|
|
|
|
|
|
get_column_values => [ qw($data column_name|column_int) ], |
|
55
|
|
|
|
|
|
|
read_next => [ qw(input_file|OPTIONAL) ], |
|
56
|
|
|
|
|
|
|
}, |
|
57
|
|
|
|
|
|
|
require_modules => { |
|
58
|
|
|
|
|
|
|
'Data::Dump' => [ ], |
|
59
|
|
|
|
|
|
|
'Text::CSV_XS' => [ ], |
|
60
|
|
|
|
|
|
|
'Metabrik::File::Read' => [ ], |
|
61
|
|
|
|
|
|
|
'Metabrik::File::Write' => [ ], |
|
62
|
|
|
|
|
|
|
'Metabrik::String::Base64' => [ ], |
|
63
|
|
|
|
|
|
|
'Metabrik::String::Compress' => [ ], |
|
64
|
|
|
|
|
|
|
}, |
|
65
|
|
|
|
|
|
|
}; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub brik_init { |
|
69
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
0
|
|
|
|
|
my $sb = Metabrik::String::Base64->new_from_brik_init($self) or return; |
|
72
|
0
|
0
|
|
|
|
|
my $sc = Metabrik::String::Compress->new_from_brik_init($self) or return; |
|
73
|
0
|
|
|
|
|
|
$self->_sb($sb); |
|
74
|
0
|
|
|
|
|
|
$self->_sc($sc); |
|
75
|
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
return $self->SUPER::brik_init; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub read { |
|
80
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
81
|
0
|
|
|
|
|
|
my ($input) = @_; |
|
82
|
|
|
|
|
|
|
|
|
83
|
0
|
|
0
|
|
|
|
$input ||= $self->input; |
|
84
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('read', $input) or return; |
|
85
|
0
|
0
|
|
|
|
|
$self->brik_help_run_file_not_found('read', $input) or return; |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
|
my $csv = Text::CSV_XS->new({ |
|
88
|
|
|
|
|
|
|
binary => 1, |
|
89
|
|
|
|
|
|
|
sep_char => $self->separator, |
|
90
|
|
|
|
|
|
|
allow_loose_quotes => 1, |
|
91
|
|
|
|
|
|
|
allow_loose_escapes => 1, |
|
92
|
|
|
|
|
|
|
escape_char => $self->escape, |
|
93
|
|
|
|
|
|
|
}) or return $self->log->error('read: Text::CSV_XS new failed'); |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
|
my $fr = Metabrik::File::Read->new_from_brik_init($self) or return; |
|
96
|
0
|
|
|
|
|
|
$fr->encoding($self->encoding); |
|
97
|
0
|
0
|
|
|
|
|
my $fd = $fr->open($input) or return; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
|
100
|
|
|
|
|
|
|
# we should encode it as base64. |
|
101
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
|
102
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
|
103
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
|
104
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
|
105
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
|
106
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
|
0
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
$self->log->debug("read: will decode field(s) [$str] in encoded format"); |
|
108
|
|
|
|
|
|
|
} |
|
109
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
|
110
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
|
111
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
|
112
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
|
0
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
$self->log->debug("read: will decode field(s) [$str] in object format"); |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
|
my $object_re = qr/^OBJECT:(.*)$/; |
|
117
|
0
|
|
|
|
|
|
my $base64_re = qr/^BASE64:(.*)$/; # Keep for backward compat. |
|
118
|
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my $sep = $self->separator; |
|
120
|
0
|
|
|
|
|
|
my $headers; |
|
121
|
|
|
|
|
|
|
my $count; |
|
122
|
0
|
|
|
|
|
|
my $first_line = 1; |
|
123
|
0
|
|
|
|
|
|
my @rows = (); |
|
124
|
0
|
|
|
|
|
|
while (my $row = $csv->getline($fd)) { |
|
125
|
|
|
|
|
|
|
# The CSV file has a header, we output an array of hashes |
|
126
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
|
127
|
0
|
0
|
|
|
|
|
if ($first_line) { # This is first line |
|
128
|
0
|
|
|
|
|
|
$headers = $row; |
|
129
|
0
|
|
|
|
|
|
$count = scalar @$row - 1; |
|
130
|
0
|
|
|
|
|
|
$first_line = 0; |
|
131
|
0
|
|
|
|
|
|
$self->header($headers); |
|
132
|
0
|
|
|
|
|
|
next; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
my $h; |
|
136
|
|
|
|
|
|
|
# We have to decode some fields |
|
137
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
|
138
|
0
|
|
|
|
|
|
for (0..$count) { |
|
139
|
0
|
|
|
|
|
|
my $k = $headers->[$_]; |
|
140
|
0
|
|
|
|
|
|
my $v = $row->[$_]; |
|
141
|
0
|
0
|
|
|
|
|
next unless defined($v); |
|
142
|
|
|
|
|
|
|
# Decode only if it has been asked and the value is not empty. |
|
143
|
|
|
|
|
|
|
# Decode the encode format |
|
144
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
|
0
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($v); |
|
146
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
|
147
|
0
|
|
|
|
|
|
$self->log->error("read: decode encoded format failed, ". |
|
148
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
|
149
|
0
|
|
|
|
|
|
next; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
0
|
|
|
|
|
|
my $gunzipped = $sc->gunzip($decoded); |
|
152
|
0
|
0
|
|
|
|
|
if (! defined($gunzipped)) { |
|
153
|
0
|
|
|
|
|
|
$self->log->error("read: gunzip failed, skipping ". |
|
154
|
|
|
|
|
|
|
"decoded data with length [".length($decoded)."]"); |
|
155
|
0
|
|
|
|
|
|
next; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
0
|
|
|
|
|
|
$v = $$gunzipped; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
# Decode the object format |
|
160
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v) |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
161
|
|
|
|
|
|
|
&& ($v =~ $object_re || $v =~ $base64_re)) { |
|
162
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($1); |
|
163
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
|
164
|
0
|
|
|
|
|
|
$self->log->error("read: decode object format failed, ". |
|
165
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
|
166
|
0
|
|
|
|
|
|
next; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
0
|
|
|
|
|
|
$v = eval($decoded); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
0
|
|
|
|
|
|
$h->{$k} = $v; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
# Or not. |
|
174
|
|
|
|
|
|
|
else { |
|
175
|
0
|
|
|
|
|
|
for (0..$count) { |
|
176
|
0
|
|
|
|
|
|
$h->{$headers->[$_]} = $row->[$_]; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
} |
|
179
|
0
|
|
|
|
|
|
push @rows, $h; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
# The CSV has no header, we output an array of arrays |
|
182
|
|
|
|
|
|
|
else { |
|
183
|
0
|
|
|
|
|
|
push @rows, $row; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
if (! $csv->eof) { |
|
188
|
0
|
|
|
|
|
|
my $error_str = "".$csv->error_diag(); |
|
189
|
0
|
|
|
|
|
|
$self->log->error("read: incomplete: error [$error_str]"); |
|
190
|
0
|
|
|
|
|
|
return \@rows; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
$fr->close; |
|
194
|
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
return \@rows; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# |
|
199
|
|
|
|
|
|
|
# We only handle ARRAY of HASHes format (aoh) for writing |
|
200
|
|
|
|
|
|
|
# |
|
201
|
|
|
|
|
|
|
sub write { |
|
202
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
203
|
0
|
|
|
|
|
|
my ($csv_struct, $output) = @_; |
|
204
|
|
|
|
|
|
|
|
|
205
|
0
|
|
0
|
|
|
|
$output ||= $self->output; |
|
206
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('write', $csv_struct) or return; |
|
207
|
0
|
0
|
|
|
|
|
$self->brik_help_run_invalid_arg('write', $csv_struct, 'ARRAY') or return; |
|
208
|
0
|
0
|
|
|
|
|
$self->brik_help_run_empty_array_arg('write', $csv_struct, 'ARRAY') or return; |
|
209
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('write', $output) or return; |
|
210
|
|
|
|
|
|
|
|
|
211
|
0
|
0
|
|
|
|
|
if (ref($csv_struct->[0]) ne 'HASH') { |
|
212
|
0
|
|
|
|
|
|
return $self->log->error("write: csv structure does not contain HASHes"); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
0
|
0
|
|
|
|
|
my $fw = Metabrik::File::Write->new_from_brik_init($self) or return; |
|
216
|
0
|
|
|
|
|
|
$fw->output($output); |
|
217
|
0
|
|
|
|
|
|
$fw->encoding($self->encoding); |
|
218
|
0
|
|
|
|
|
|
$fw->overwrite($self->overwrite); |
|
219
|
0
|
|
|
|
|
|
$fw->append($self->append); |
|
220
|
0
|
|
|
|
|
|
$fw->use_locking($self->use_locking); |
|
221
|
0
|
|
|
|
|
|
$fw->unbuffered($self->unbuffered); |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
|
224
|
|
|
|
|
|
|
# we should encode it as base64. |
|
225
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
|
226
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
|
227
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
|
228
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
|
229
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
|
230
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
|
0
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
$self->log->debug("write: will encode field(s) [$str] in encoded format"); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
|
234
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
|
235
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
|
236
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
|
0
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
$self->log->debug("write: will encode field(s) [$str] in object format"); |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# |
|
241
|
|
|
|
|
|
|
# Set header ordering |
|
242
|
|
|
|
|
|
|
# |
|
243
|
0
|
|
|
|
|
|
my %order = (); |
|
244
|
0
|
|
|
|
|
|
my @header = (); |
|
245
|
|
|
|
|
|
|
# Order headers either by using user provided one or our own default ordering. |
|
246
|
0
|
0
|
|
|
|
|
if ($self->header) { |
|
247
|
0
|
|
|
|
|
|
@header = @{$self->header}; |
|
|
0
|
|
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
my $idx = 0; |
|
249
|
0
|
|
|
|
|
|
for my $k (@header) { |
|
250
|
0
|
|
|
|
|
|
$order{$k} = $idx; |
|
251
|
0
|
|
|
|
|
|
$idx++; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
# If user didn't provide her own header, we use first element from struct. |
|
255
|
|
|
|
|
|
|
else { |
|
256
|
0
|
|
|
|
|
|
my $first = $csv_struct->[0]; |
|
257
|
0
|
|
|
|
|
|
@header = sort { $a cmp $b } keys %$first; |
|
|
0
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
my $idx = 0; |
|
259
|
0
|
|
|
|
|
|
for my $k (@header) { |
|
260
|
0
|
|
|
|
|
|
$order{$k} = $idx; |
|
261
|
0
|
|
|
|
|
|
$idx++; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my $header_count = @header; |
|
266
|
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $is_new_file = (! -f $output); |
|
268
|
0
|
0
|
|
|
|
|
my $fd = $fw->open or return; |
|
269
|
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
my $written = ''; |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Write header if this is a new file and user asked for it. |
|
273
|
0
|
0
|
0
|
|
|
|
if ($self->write_header && ($is_new_file || $self->overwrite)) { |
|
|
|
|
0
|
|
|
|
|
|
274
|
0
|
|
|
|
|
|
my $data = join($self->separator, @header)."\n"; |
|
275
|
0
|
|
|
|
|
|
my $r = $fw->write($data); |
|
276
|
0
|
0
|
|
|
|
|
if (! defined($r)) { |
|
277
|
0
|
|
|
|
|
|
return; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
0
|
|
|
|
|
|
$written .= $data; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
0
|
|
|
|
|
|
my $separator = $self->separator; |
|
283
|
0
|
|
|
|
|
|
my $escape = $self->escape; |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
local $Data::Dump::INDENT = ""; # No indentation shorten length |
|
286
|
0
|
|
|
|
|
|
local $Data::Dump::TRY_BASE64 = 0; # Never encode in base64 |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# Write the structure to file. |
|
289
|
0
|
|
|
|
|
|
for my $this (@$csv_struct) { |
|
290
|
0
|
|
|
|
|
|
my @fields = (); |
|
291
|
|
|
|
|
|
|
# We have to decode some fields |
|
292
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
|
293
|
0
|
|
|
|
|
|
for my $key (keys %$this) { |
|
294
|
|
|
|
|
|
|
# We may have some unwanted data in this HASH, we skip it. |
|
295
|
0
|
0
|
|
|
|
|
next if (! defined($order{$key})); |
|
296
|
0
|
|
|
|
|
|
my $k = $key; |
|
297
|
0
|
|
|
|
|
|
my $v = $this->{$key}; |
|
298
|
0
|
0
|
|
|
|
|
next unless defined($v); |
|
299
|
|
|
|
|
|
|
# Encode only if it has been asked and the value is not empty. |
|
300
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
|
0
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# Gzip to handle UTF-like encodings, cause Base64 does not like that. |
|
302
|
0
|
|
|
|
|
|
my $gzipped = $sc->gzip($v); |
|
303
|
0
|
0
|
|
|
|
|
if (! defined($gzipped)) { |
|
304
|
0
|
|
|
|
|
|
$self->log->error("write: gzip failed, skipping"); |
|
305
|
0
|
|
|
|
|
|
next; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
0
|
|
|
|
|
|
$v = $sb->encode($$gzipped); |
|
308
|
0
|
0
|
|
|
|
|
if (! defined($v)) { |
|
309
|
0
|
|
|
|
|
|
$self->log->error("write: encode in encoded format failed, skipping"); |
|
310
|
0
|
|
|
|
|
|
next; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
# Encode only if it has been asked and the value is not empty. |
|
314
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v)) { |
|
|
|
|
0
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Encode ARRAYs and HASHes only if they are not empty. |
|
316
|
|
|
|
|
|
|
# Do not encode simple strings. |
|
317
|
0
|
0
|
0
|
|
|
|
if (ref($v) eq 'ARRAY' && @$v > 0 |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
318
|
|
|
|
|
|
|
|| ref($v) eq 'HASH' && keys %$v > 0) { |
|
319
|
0
|
|
|
|
|
|
$v = Data::Dump::dump($v); $v =~ s{\n}{}g; |
|
|
0
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
|
$v = 'OBJECT:'.$sb->encode($v); |
|
321
|
0
|
0
|
|
|
|
|
if (! defined($v)) { |
|
322
|
0
|
|
|
|
|
|
$self->log->error("write: encode in object format failed, skipping"); |
|
323
|
0
|
|
|
|
|
|
next; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
# If this is a simple string, we do not encode at all. |
|
327
|
|
|
|
|
|
|
elsif (ref($v) eq '' && length($v)) { |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
# And for empty objects, we set them to empty string. |
|
330
|
|
|
|
|
|
|
else { |
|
331
|
0
|
|
|
|
|
|
$v = ""; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
} |
|
334
|
0
|
|
|
|
|
|
$fields[$order{$key}] = $v; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
# Or not. |
|
338
|
|
|
|
|
|
|
else { |
|
339
|
0
|
|
|
|
|
|
for my $key (keys %$this) { |
|
340
|
|
|
|
|
|
|
# We may have some unwanted data in this HASH, we skip it. |
|
341
|
0
|
0
|
|
|
|
|
next if (! defined($order{$key})); |
|
342
|
0
|
|
|
|
|
|
$fields[$order{$key}] = $this->{$key}; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
0
|
0
|
|
|
|
|
@fields = map { defined($_) ? $_ : '' } @fields; |
|
|
0
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# If this entry has less fields than the header, we add null entries. |
|
349
|
0
|
|
|
|
|
|
my $field_count = @fields; |
|
350
|
0
|
0
|
|
|
|
|
if ($field_count < $header_count) { |
|
351
|
0
|
|
|
|
|
|
my $diff = $header_count - $field_count; |
|
352
|
0
|
|
|
|
|
|
for (1..$diff) { |
|
353
|
0
|
|
|
|
|
|
push @fields, ''; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
|
|
357
|
0
|
0
|
|
|
|
|
if ($self->use_quoting) { |
|
358
|
0
|
|
|
|
|
|
for (@fields) { |
|
359
|
0
|
|
|
|
|
|
s/"/${escape}"/g; |
|
360
|
0
|
|
|
|
|
|
$_ = '"'.$_.'"'; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
my $data = join($separator, @fields)."\n"; |
|
365
|
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
|
my $r = $fw->write($data); |
|
367
|
0
|
0
|
|
|
|
|
if (! defined($r)) { |
|
368
|
0
|
|
|
|
|
|
next; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
0
|
|
|
|
|
|
$written .= $data; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
|
$fw->close; |
|
375
|
|
|
|
|
|
|
|
|
376
|
0
|
0
|
|
|
|
|
if (! length($written)) { |
|
377
|
0
|
|
|
|
|
|
return $self->log->error("write: nothing to write"); |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
|
return $written; |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub get_column_values { |
|
384
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
385
|
0
|
|
|
|
|
|
my ($data, $column) = @_; |
|
386
|
|
|
|
|
|
|
|
|
387
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('get_column_values', $data) or return; |
|
388
|
0
|
0
|
|
|
|
|
$self->brik_help_run_invalid_arg('get_column_values', $data, 'ARRAY') or return; |
|
389
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('get_column_values', $column) or return; |
|
390
|
|
|
|
|
|
|
|
|
391
|
0
|
|
|
|
|
|
my @results = (); |
|
392
|
|
|
|
|
|
|
# CSV structure is an ARRAYREF of HASHREFs |
|
393
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
|
|
|
0
|
|
|
|
|
|
|
394
|
0
|
0
|
|
|
|
|
if (@{$self->header} == 0) { |
|
|
0
|
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
return $self->log->error("get_column_values: no CSV header found"); |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
|
|
398
|
0
|
|
|
|
|
|
for my $row (@$data) { |
|
399
|
0
|
0
|
|
|
|
|
if (ref($row) ne 'HASH') { |
|
400
|
0
|
|
|
|
|
|
$self->log->warning("get_column_values: row is not a HASHREF"); |
|
401
|
0
|
|
|
|
|
|
next; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
0
|
0
|
|
|
|
|
if (exists($row->{$column})) { |
|
404
|
0
|
|
|
|
|
|
push @results, $row->{$column}; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
# CSV structure is an ARRAYREF of ARRAYREFs |
|
409
|
|
|
|
|
|
|
elsif ($column =~ m{^\d+$}) { |
|
410
|
0
|
|
|
|
|
|
for my $row (@$data) { |
|
411
|
0
|
0
|
|
|
|
|
if (ref($row) ne 'ARRAY') { |
|
412
|
0
|
|
|
|
|
|
$self->log->warning("get_column_values: row is not an ARRAYREF"); |
|
413
|
0
|
|
|
|
|
|
next; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
0
|
0
|
|
|
|
|
if (exists($row->[$column])) { |
|
416
|
0
|
|
|
|
|
|
push @results, $row->[$column]; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
return \@results; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub read_next { |
|
425
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
426
|
0
|
|
|
|
|
|
my ($input) = @_; |
|
427
|
|
|
|
|
|
|
|
|
428
|
0
|
|
0
|
|
|
|
$input ||= $self->input; |
|
429
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('read_next', $input) or return; |
|
430
|
0
|
0
|
|
|
|
|
$self->brik_help_run_file_not_found('read_next', $input) or return; |
|
431
|
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
|
my $csv = $self->_csv; |
|
433
|
0
|
|
|
|
|
|
my $fd = $self->_fd; |
|
434
|
0
|
0
|
|
|
|
|
if (! defined($csv)) { |
|
435
|
0
|
|
|
|
|
|
$self->log->debug('read_next: first call, create _csv'); |
|
436
|
0
|
0
|
|
|
|
|
$csv = Text::CSV_XS->new({ |
|
437
|
|
|
|
|
|
|
binary => 1, |
|
438
|
|
|
|
|
|
|
sep_char => $self->separator, |
|
439
|
|
|
|
|
|
|
allow_loose_quotes => 1, |
|
440
|
|
|
|
|
|
|
allow_loose_escapes => 1, |
|
441
|
|
|
|
|
|
|
escape_char => $self->escape, |
|
442
|
|
|
|
|
|
|
}) or return $self->log->error('read_next: Text::CSV_XS new failed'); |
|
443
|
0
|
|
|
|
|
|
$self->_csv($csv); |
|
444
|
|
|
|
|
|
|
|
|
445
|
0
|
0
|
|
|
|
|
my $fr = Metabrik::File::Read->new_from_brik_init($self) or return; |
|
446
|
0
|
|
|
|
|
|
$fr->encoding($self->encoding); |
|
447
|
0
|
0
|
|
|
|
|
$fd = $fr->open($input) or return; |
|
448
|
0
|
|
|
|
|
|
$self->_fd($fd); |
|
449
|
|
|
|
|
|
|
|
|
450
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
|
451
|
0
|
|
|
|
|
|
my $header = $csv->getline($fd); |
|
452
|
0
|
|
|
|
|
|
$self->header($header); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
|
457
|
|
|
|
|
|
|
# we should encode it as base64. |
|
458
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
|
459
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
|
460
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
|
461
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
|
462
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
|
463
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
|
0
|
|
|
|
|
|
|
|
464
|
0
|
|
|
|
|
|
$self->log->debug("read_next: will decode field(s) [$str] in base64"); |
|
465
|
|
|
|
|
|
|
} |
|
466
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
|
467
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
|
468
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
|
469
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
|
0
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
$self->log->debug("read_next: will decode field(s) [$str] in object format"); |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
|
my $object_re = qr/^OBJECT:(.*)$/; |
|
474
|
0
|
|
|
|
|
|
my $base64_re = qr/^BASE64:(.*)$/; # Keep for backward compat. |
|
475
|
|
|
|
|
|
|
|
|
476
|
0
|
|
|
|
|
|
my $row = $csv->getline($fd); |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# If a header is given as an Attribute, we use it to return a HASH |
|
479
|
0
|
|
|
|
|
|
my $header = $self->header; |
|
480
|
0
|
0
|
|
|
|
|
if (defined($header)) { |
|
481
|
0
|
|
|
|
|
|
my $h = {}; |
|
482
|
0
|
|
|
|
|
|
my $i = 0; |
|
483
|
|
|
|
|
|
|
# We have to decode some fields |
|
484
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
|
485
|
0
|
|
|
|
|
|
for (@$header) { |
|
486
|
0
|
|
|
|
|
|
my $k = $_; |
|
487
|
0
|
|
|
|
|
|
my $v = $row->[$i++]; |
|
488
|
0
|
0
|
|
|
|
|
next unless defined($v); |
|
489
|
|
|
|
|
|
|
# Decode only if it has been asked and the value is not empty. |
|
490
|
|
|
|
|
|
|
# Decode the encode format |
|
491
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
|
0
|
|
|
|
|
|
492
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($v); |
|
493
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
|
494
|
0
|
|
|
|
|
|
$self->log->error("read_next: decode failed, skipping data with ". |
|
495
|
|
|
|
|
|
|
"with length [".length($v)."]"); |
|
496
|
0
|
|
|
|
|
|
next; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
0
|
|
|
|
|
|
my $gunzipped = $sc->gunzip($decoded); |
|
499
|
0
|
0
|
|
|
|
|
if (! defined($gunzipped)) { |
|
500
|
0
|
|
|
|
|
|
$self->log->error("read_next: gunzip failed, skipping ". |
|
501
|
|
|
|
|
|
|
"decoded data with length [".length($decoded)."]"); |
|
502
|
0
|
|
|
|
|
|
next; |
|
503
|
|
|
|
|
|
|
} |
|
504
|
0
|
|
|
|
|
|
$v = $$gunzipped; |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
# Decode the object format |
|
507
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v) |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
508
|
|
|
|
|
|
|
&& ($v =~ $object_re || $v =~ $base64_re)) { |
|
509
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($1); |
|
510
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
|
511
|
0
|
|
|
|
|
|
$self->log->error("read_next: decode object format failed, ". |
|
512
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
|
513
|
0
|
|
|
|
|
|
next; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
0
|
|
|
|
|
|
$v = eval($decoded); |
|
516
|
|
|
|
|
|
|
} |
|
517
|
0
|
|
|
|
|
|
$h->{$k} = $v; |
|
518
|
|
|
|
|
|
|
} |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
# Or not. |
|
521
|
|
|
|
|
|
|
else { |
|
522
|
0
|
|
|
|
|
|
for (@$header) { |
|
523
|
0
|
|
|
|
|
|
$h->{$_} = $row->[$i++]; |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
} |
|
526
|
0
|
|
|
|
|
|
$row = $h; |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
|
|
529
|
0
|
0
|
|
|
|
|
if ($csv->eof) { |
|
530
|
0
|
|
|
|
|
|
$self->log->debug('read_next: eof reached'); |
|
531
|
0
|
|
|
|
|
|
$self->_fd(undef); |
|
532
|
0
|
|
|
|
|
|
$self->_csv(undef); |
|
533
|
0
|
|
|
|
|
|
return 0; |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
|
|
536
|
0
|
|
|
|
|
|
return $row; |
|
537
|
|
|
|
|
|
|
} |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
1; |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
__END__ |