| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package DataCube::FileSplitter; |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
2595
|
use lib '..'; |
|
|
1
|
|
|
|
|
868
|
|
|
|
1
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
131
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
27
|
|
|
9
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
30
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use Fcntl; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
337
|
|
|
12
|
1
|
|
|
1
|
|
5
|
use URI::file; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
36
|
|
|
13
|
1
|
|
|
1
|
|
6
|
use Digest::MD5; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
14
|
1
|
|
|
1
|
|
6
|
use Time::HiRes; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
11
|
|
|
15
|
1
|
|
|
1
|
|
113
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
58
|
|
|
16
|
1
|
|
|
1
|
|
5
|
use Cwd qw(getcwd); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
17
|
1
|
|
|
1
|
|
4
|
use Storable qw(nstore retrieve); |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
38
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
53
|
use DataCube; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use DataCube::Schema; |
|
21
|
|
|
|
|
|
|
use DataCube::MeasureUpdater; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub new { |
|
24
|
|
|
|
|
|
|
my($class,%opts) = @_; |
|
25
|
|
|
|
|
|
|
bless {%opts}, ref($class) || $class; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub split { |
|
29
|
|
|
|
|
|
|
my($self,@opts,%opts) = @_; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
split_opts:{ |
|
32
|
|
|
|
|
|
|
%opts = @opts and last split_opts if @_ > 2 && @_ % 2; |
|
33
|
|
|
|
|
|
|
$opts{file} = $opts[0] and last split_opts if @_ == 2; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $path = $opts{file}; |
|
37
|
|
|
|
|
|
|
my $pref = $opts{prefix} || 2; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$path =~ /^((?:.*?[\/\\])?)([^\/\\]+?)$/; |
|
40
|
|
|
|
|
|
|
my($dir,$file) = ($1,$2); |
|
41
|
|
|
|
|
|
|
$file =~ s/\..{1,4}$//i; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
mkdir($dir.$file) |
|
44
|
|
|
|
|
|
|
or die "DataCube::FileSplitter(split):\ncant make directory:\n$dir$file\nfrom path:$path\n$!\n"; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $digester = Digest::MD5->new; |
|
47
|
|
|
|
|
|
|
my $data_cube = Storable::retrieve($path); |
|
48
|
|
|
|
|
|
|
my $base_cube_name = $data_cube->{meta_data}->{system}->{base_cube_name} || $data_cube->{meta_data}->{system}->{base_cube}; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my $cubes = $data_cube->{cube_store}->cubes; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
for(keys %$cubes){ |
|
53
|
|
|
|
|
|
|
my $cube_name = $_; |
|
54
|
|
|
|
|
|
|
my $cube_data = $data_cube->{cube_store}->fetch($cube_name); |
|
55
|
|
|
|
|
|
|
my $name_dige = $cube_data->{schema}->{name_digest}; |
|
56
|
|
|
|
|
|
|
my $cube_hash = $cube_data->{cube}; |
|
57
|
|
|
|
|
|
|
my $cube_targ = $dir.$file.'/'.$name_dige; |
|
58
|
|
|
|
|
|
|
mkdir($cube_targ) |
|
59
|
|
|
|
|
|
|
or die "DataCube::FileSplitter(split):\ncant make directory:\n". |
|
60
|
|
|
|
|
|
|
"$cube_targ\nfrom cube named\n$cube_name\n$name_dige\n$!\n"; |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
nstore($cube_data->{schema}, $cube_targ."/.schema"); |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my %prefices; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
for(keys %$cube_hash){ |
|
67
|
|
|
|
|
|
|
my $digest = $digester->add($_)->hexdigest; |
|
68
|
|
|
|
|
|
|
my $prefix = substr($digest, 0 , $pref); |
|
69
|
|
|
|
|
|
|
$prefices{$prefix}->{$digest} = $_; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
for(keys %prefices){ |
|
73
|
|
|
|
|
|
|
my $cube_hunk; |
|
74
|
|
|
|
|
|
|
my @cube_keys = values %{$prefices{$_}}; |
|
75
|
|
|
|
|
|
|
$cube_hunk->{$_} = $cube_hash->{$_} for @cube_keys; |
|
76
|
|
|
|
|
|
|
nstore($cube_hunk, $cube_targ . "/$_"); |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
return $self; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub merge_all { |
|
84
|
|
|
|
|
|
|
my($self,$dir) = @_; |
|
85
|
|
|
|
|
|
|
my @dir = grep {$_ !~ /^merge$/ } $self->dir($dir); |
|
86
|
|
|
|
|
|
|
my $merge_dir = "$dir/merge"; |
|
87
|
|
|
|
|
|
|
unless(-d($merge_dir)){ |
|
88
|
|
|
|
|
|
|
mkdir($merge_dir) or die "DataCube::FileSplitter(merge_all):\ncant make directory:$merge_dir\n$!\n" |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub merge { |
|
94
|
|
|
|
|
|
|
my($self,%opts) = @_; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $target = $opts{target}; |
|
97
|
|
|
|
|
|
|
my $source_files = $opts{source_files}; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $schema; |
|
100
|
|
|
|
|
|
|
my $sources; |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
unless(-d($target)){ |
|
103
|
|
|
|
|
|
|
mkdir($target) or die "DataCube::FileSplitter(merge : mkdir):\ncant make target directory:\n$target\n$!\n"; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
base_check:{ |
|
107
|
|
|
|
|
|
|
my $first = $source_files->[0]; |
|
108
|
|
|
|
|
|
|
my @first = grep{/^[a-f0-9]+$/i}$self->dir($first); |
|
109
|
|
|
|
|
|
|
for(@first){ |
|
110
|
|
|
|
|
|
|
my $name = $_; |
|
111
|
|
|
|
|
|
|
for(@$source_files){ |
|
112
|
|
|
|
|
|
|
die "DataCube::FileSplitter(merge : base_check):\nmissing cube named:\n$name\nin merge source:\n$_" |
|
113
|
|
|
|
|
|
|
unless (-d("$_/$name")) |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
my $i = 0; |
|
118
|
|
|
|
|
|
|
for(@$source_files){ |
|
119
|
|
|
|
|
|
|
my $dir = $_; |
|
120
|
|
|
|
|
|
|
my @cube_dirs = $self->dir($dir); |
|
121
|
|
|
|
|
|
|
for(@cube_dirs){ |
|
122
|
|
|
|
|
|
|
my $cube_dir = $_; |
|
123
|
|
|
|
|
|
|
if($i == 0){ |
|
124
|
|
|
|
|
|
|
my $schema = Storable::retrieve("$dir/$cube_dir/.schema"); |
|
125
|
|
|
|
|
|
|
$sources->{$cube_dir}->{schema} = $schema; |
|
126
|
|
|
|
|
|
|
$sources->{$cube_dir}->{updater} = DataCube::MeasureUpdater->new($schema); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
my @data_files = grep{$_ ne '.schema'}$self->dir("$dir/$cube_dir"); |
|
129
|
|
|
|
|
|
|
for(@data_files){ |
|
130
|
|
|
|
|
|
|
my $prefix = $_; |
|
131
|
|
|
|
|
|
|
push @ { $sources->{$cube_dir}->{parts}->{$prefix} }, "$dir/$cube_dir/$prefix"; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
$i++; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
for(keys %$sources){ |
|
139
|
|
|
|
|
|
|
unless(-d("$target/$_")){ |
|
140
|
|
|
|
|
|
|
mkdir("$target/$_") or die |
|
141
|
|
|
|
|
|
|
"DataCube::FileSplitter(merge : mkdir):\ncant make target directory:\n$target/$_\n$!\n"; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
my $cube_name = $_; |
|
144
|
|
|
|
|
|
|
my %parts = %{$sources->{$cube_name}->{parts}}; |
|
145
|
|
|
|
|
|
|
for(sort keys %parts){ |
|
146
|
|
|
|
|
|
|
my $prefix = $_; |
|
147
|
|
|
|
|
|
|
$self->merge_files( |
|
148
|
|
|
|
|
|
|
files => $sources->{$cube_name}->{parts}->{$prefix}, |
|
149
|
|
|
|
|
|
|
target => $target . "/$cube_name/$prefix", |
|
150
|
|
|
|
|
|
|
updater => $sources->{$cube_name}->{updater}, |
|
151
|
|
|
|
|
|
|
); |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
return $self; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub merge_files { |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
my($self,%opts) = @_; |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
my $files = $opts{files}; |
|
162
|
|
|
|
|
|
|
my $target = $opts{target}; |
|
163
|
|
|
|
|
|
|
my $updater = $opts{updater}; |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
if( -f($target) ) { |
|
167
|
|
|
|
|
|
|
unshift @$files, $target; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $big_hunk = {}; |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
for(@$files){ |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
my $small_hunk = Storable::retrieve($_); |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
unless (ref($small_hunk)){ |
|
177
|
|
|
|
|
|
|
die "DataCube::FileSplitter(merge_files):\nStorable returned a non-ref\n$!" |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
for(keys %$small_hunk){ |
|
181
|
|
|
|
|
|
|
$updater->update( |
|
182
|
|
|
|
|
|
|
target => $big_hunk, |
|
183
|
|
|
|
|
|
|
source => $small_hunk, |
|
184
|
|
|
|
|
|
|
source_key => $_, |
|
185
|
|
|
|
|
|
|
target_key => $_, |
|
186
|
|
|
|
|
|
|
); |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Storable::nstore($big_hunk,$target); |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
return $self; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub dir { |
|
197
|
|
|
|
|
|
|
my($self,$path) = @_; |
|
198
|
|
|
|
|
|
|
opendir(my $D, $path) or die "DataCube::FileSplitter(dir):\ncant open directory:$path\n$!\n"; |
|
199
|
|
|
|
|
|
|
grep {/[^\.]/} readdir($D); |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
1; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
__END__ |