File Coverage

blib/lib/Crypt/IDA/ShareFile.pm
Criterion Covered Total %
statement 320 499 64.1
branch 116 256 45.3
condition 39 117 33.3
subroutine 20 34 58.8
pod 1 8 12.5
total 496 914 54.2


line stmt bran cond sub pod time code
1             package Crypt::IDA::ShareFile;
2              
3 1     1   24648 use 5.008008;
  1         3  
  1         40  
4 1     1   6 use strict;
  1         2  
  1         46  
5 1     1   5 use warnings;
  1         2  
  1         35  
6              
7 1     1   4 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
  1         3  
  1         103  
8              
9 1     1   12 use Carp;
  1         2  
  1         107  
10 1     1   5 use Fcntl qw(:DEFAULT :seek);
  1         2  
  1         1042  
11 1     1   968 use Crypt::IDA qw(:all);
  1         3  
  1         11960  
12              
13             require Exporter;
14              
15             my @export_default = qw( sf_calculate_chunk_sizes
16             sf_split sf_combine);
17             my @export_extras = qw( sf_sprintf_filename );
18              
19             our @ISA = qw(Exporter);
20             our %EXPORT_TAGS = (
21             'all' => [ @export_extras, @export_default ],
22             'default' => [ @export_default ],
23             );
24             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
25             our @EXPORT = qw( );
26              
27             our $VERSION = '0.01';
28             our $classname="Crypt::IDA::ShareFile";
29              
30             sub sf_sprintf_filename {
31 10     10 0 25 my ($self,$class);
32 10 50 33     54 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
33 0         0 $self=shift;
34 0         0 $class=ref($self);
35             } else {
36 10         16 $self=$classname;
37             }
38 10         14 my ($format,$filename,$chunk,$share)=@_;
39              
40 10         44 $format=~s/\%f/$filename/;
41 10         21 $format=~s/\%c/$chunk/;
42 10         29 $format=~s/\%s/$share/;
43              
44 10         28 return $format;
45             }
46              
47              
48             # I could eliminate the use of closures below, but it makes for a
49             # convenient wrapper for handling byte order issues, and also for
50             # implementing the "dry run" option to sf_write_ida_header later.
51             sub sf_mk_file_istream {
52 9     9 0 11 my ($self,$class);
53 9 50 33     46 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
54 0         0 $self=shift;
55 0         0 $class=ref($self);
56             } else {
57 9         18 $self=$classname;
58             }
59 9         14 my ($filename,$default_bytes_per_read)=@_;
60 9         13 my ($fh,$eof)=(undef,0);
61              
62             # basic checking of args
63 9 50       27 $default_bytes_per_read=1 unless defined($default_bytes_per_read);
64 9 50 33     78 if (!defined($filename) or
      33        
      33        
65             $default_bytes_per_read <= 0 or $default_bytes_per_read > 4 or
66             int($default_bytes_per_read) != $default_bytes_per_read) {
67 0         0 return undef;
68             }
69 9 50       21 die "using istream with >32 bits would lead to precision errors\n"
70             if $default_bytes_per_read > 4;
71              
72             # try opening the file; use sysopen to match better with later sysreads
73 9 50       437 return undef unless sysopen $fh,$filename,O_RDONLY;
74              
75             # Use closure/callback technique to provide an iterator for this file
76             my $methods=
77             {
78 0     0   0 FILENAME => sub { $filename },
79 0     0   0 FH => sub { $fh },
80             READ => sub {
81             # This reads words from the file in network (big-endian) byte
82             # order, with zero padding in the least significant bytes. So,
83             # for example, if we are using 2-byte chunks and the file
84             # contains three bytes 0d fe 2d, then two reads on the file
85             # will return the values 0dfe and 2d00. Return values are
86             # integers or undef on eof.
87              
88 99     99   97 my ($override_bytes,$bytes_to_read);
89              
90 99 50       201 if ($override_bytes=shift) {
91 99         110 $bytes_to_read=$override_bytes;
92 99 50 33     390 die "Invalid bytes to read $bytes_to_read" if
93             int($bytes_to_read) != $bytes_to_read or $bytes_to_read <= 0;
94 99 50       184 die "using istream with >32 bits would lead to precision errors\n"
95             if $bytes_to_read > 4;
96             } else {
97 0         0 $bytes_to_read=$default_bytes_per_read;
98             }
99              
100 99         109 my $buf="";
101 99         530 my $bytes_read=sysread $fh, $buf, $bytes_to_read;
102              
103             # There are three possible return value from sysread:
104             # undef there was a problem with the read (caller should check $!)
105             # 0 no bytes read (eof)
106             # >0 some bytes read (maybe fewer than we wanted, due to eof)
107 99 50       186 return undef unless defined($bytes_read);
108 99 50       171 if ($bytes_read == 0) {
109 0         0 $eof=1;
110 0         0 return undef;
111             }
112              
113             # we don't need to set eof, but it might be useful for callers
114 99 50       153 $eof=1 if ($bytes_read < $bytes_to_read);
115              
116             # Convert these bytes into a number (first byte is high byte )
117 99         240 $buf=pack "a$bytes_to_read", $buf; # pad zeroes on right
118             #$buf.="\0" x ($bytes_to_read - length $buf);
119              
120             # hex() can only handle values up to 32 bits, but perl scalars
121             # can handle up to 64 bits (they're upgraded to floats
122             # internally after 32 bits, though). I'm disabling this since I
123             # don't think it's acceptable. The only upshot for the rest of
124             # the program is that file size is now limited to 4Gb - 1 byte.
125 99         99 my $val=0;
126             # while ($bytes_to_read > 4) {
127             # $val=unpack "N", (substr $buf,0,4,"");
128             # $bytes_to_read-=4;
129             # $val <<= 32;
130             # }
131 99         144 my $hex_format="H" . ($bytes_to_read * 2); # count nibbles
132 99         352 return hex unpack $hex_format, $buf;
133              
134             },
135 0     0   0 EOF => sub { return $eof; },
136 0     0   0 SEEK => sub { seek $fh, shift, 0; },
137 0     0   0 TELL => sub { tell $fh; },
138 0     0   0 CLOSE => sub { close $fh; }
139 9         149 };
140 9         26 return $methods;
141             }
142              
143             sub sf_mk_file_ostream {
144 9     9 0 14 my ($filename,$default_bytes_per_write)=@_;
145 9         17 my ($fh,$eof)=(undef,0);
146              
147             # basic checking of args
148 9 50 33     57 if (!defined($filename) or !defined($default_bytes_per_write) or
      33        
149             $default_bytes_per_write <= 0) {
150 0         0 return undef;
151             }
152              
153             # try opening the file; use sysopen to match later sysreads
154 9 50       577 return undef unless sysopen $fh,$filename,O_CREAT|O_TRUNC|O_WRONLY;
155              
156             my $methods=
157             {
158 0     0   0 FILENAME => sub { $filename },
159 9     9   34 FH => sub { $fh },
160             WRITE => sub {
161 99     99   193 my $num=shift;
162              
163 99         88 my ($override_bytes,$bytes_to_write);
164              
165 99 50       152 if ($override_bytes=shift) {
166 99         125 $bytes_to_write=$override_bytes;
167             } else {
168 0         0 $bytes_to_write=$default_bytes_per_write;
169             }
170              
171             # Writing is a little easier than reading, but we have to take
172             # care if the number passed is too large to fit in the
173             # requested number of bytes. If it's too large a warning will
174             # be emitted and we discard any extra *high* bits.
175              
176 99         120 my $buf="";
177              
178 99 50       200 if ($num >= 256 ** $bytes_to_write) {
179 0         0 carp "ostream: Number too large. Discarded high bits.";
180 0         0 $num %= (256 ** ($bytes_to_write) - 1);
181             }
182              
183 99         144 my $hex_format="H" . ($bytes_to_write * 2);
184 99         282 $buf=pack $hex_format, sprintf "%0*x", $bytes_to_write*2, $num;
185 99         5894 syswrite $fh,$buf,$bytes_to_write;
186              
187             },
188 0     0   0 EOF => sub { 0; },
189 0     0   0 FILENAME => sub { return $filename; },
190 0     0   0 FLUSH => sub { 0; },
191 0     0   0 SEEK => sub { seek $fh, shift, 0; },
192 0     0   0 TELL => sub { tell $fh; },
193 0     0   0 CLOSE => sub { close $fh; }
194 9         168 };
195 9         47 return $methods; # be explicit
196             }
197              
198             # Routines to read/write share file header
199             #
200             # header version 1
201             #
202             # bytes name value
203             # 2 magic marker for "Share File" format; "SF" = {5346}
204             # 1 version file format version = 1
205             # 1 options options bits (see below)
206             # 1-2 k,quorum quorum k-value (set both names on read)
207             # 1-2 s,security security level s-value (width in bytes)
208             # var chunk_start absolute offset of chunk in file
209             # var chunk_next absolute offset of next chunk in file
210             # var transform transform matrix row
211             #
212             # The options bits are as follows:
213             #
214             # Bit name Settings
215             # 0 opt_large_k Large (2-byte) k value?
216             # 1 opt_large_w Large (2-byte) s value?
217             # 2 opt_final Final chunk in file? (1=full file/final chunk)
218             # 3 opt_transform Is transform data included?
219             #
220             # Note that the chunk_next field is 1 greater than the actual offset
221             # of the chunk end. In other words, the chunk ranges from the byte
222             # starting at chunk_start up to, but not including the byte at
223             # chunk_next. That's why it's called chunk_next rather than chunk_end.
224             #
225             # More on this: it might seem that it's ok to refuse to split a
226             # zero-length file, but if we're using this for backups, it's not a
227             # good idea to fail just because we don't like zero-length
228             # files. Also, splitting a zero-length file might be useful in some
229             # cases, since we might be interested in just creating and storing a
230             # transform matrix for later use, or maybe generating test cases or
231             # debugging a matrix inverse routine.
232              
233             sub sf_read_ida_header {
234 9     9 0 13 my $istream=shift; # assume istream is at start of file
235 9         13 my $header_info={}; # values will be returned in this hash
236              
237             #warn "Reading header from istream " . ($istream->{FILENAME}->()) . "\n";
238              
239             # When calling this routine the caller can specify any
240             # previously-read values for k, s, and so on and have us check the
241             # values in the current header against these values for consistency.
242             # This implies that all the shares we're being presented for
243             # processing will combine to form a single chunk (or full file). If
244             # this is the first share header being read, the following may be
245             # undefined. We store any read values in the returned hash, so it's
246             # up to the caller to take them out and pass them back to us when
247             # reading the next header in the batch.
248 9         15 my ($k,$w,$start,$next,$hdr)=@_;
249              
250 9         10 my $header_size=0; # we also send this back in hash
251              
252             # error reporting
253 9         21 $header_info->{header_error}=0; # 0=no error, 1=failure
254 9         17 $header_info->{error_message}=""; # text of error
255              
256             # use a local subroutine to save the tedium of checking for eof and
257             # doing conversion of input. Updates variables from our local scope
258             # directly, so we don't need to pass them in or out. (Actually,
259             # technically speaking, this creates an anonymous closure and
260             # locally assigns a name to it for the current scope, but it's
261             # pretty much the same thing as a local subroutine)
262             local *read_some = sub {
263 99     99   212 my ($bytes,$field,$conversion)=@_;
264 99         107 my ($vec,$hex);
265 99 50       161 if ($vec=$istream->{READ}->($bytes), defined($vec)) {
266 99 100       180 if (defined ($conversion)) {
267 72 100       168 if ($conversion eq "hex") {
    50          
268 9         57 $vec=sprintf "%*x", $bytes, $vec;
269             } elsif ($conversion eq "dec") {
270 63         72 $vec=$vec; # istream already returns integers
271             } else {
272 0         0 die "Unknown format conversion (use undef, hex or dec)\n";
273             }
274             }
275 99         173 $header_info->{$field}=$vec;
276 99         111 $header_size+=$bytes;
277 99         271 return 1; # read some? got some.
278             } else {
279 0         0 $header_info->{error}++;
280 0         0 $header_info->{error_message}="Premature end of stream\n";
281 0         0 return 0; # read some? got none!
282             }
283 9         40 };
284              
285             # same idea for saving and reporting errors
286             local *header_error = sub {
287 0     0   0 $header_info->{error}++;
288 0         0 $header_info->{error_message}=shift;
289 0         0 return $header_info;
290 9         30 };
291              
292 9 50       23 return $header_info unless read_some(2,"magic","hex");
293 9 50       27 if ($header_info->{magic} ne "5346") {
294 0         0 return header_error("This doesn't look like a share file\n" .
295             "Magic is $header_info->{magic}\n");
296             }
297              
298 9 50       22 return $header_info unless read_some(1,"version","dec");
299 9 50       25 if ($header_info->{version} != 1) {
300 0         0 return header_error("Don't know how to handle header version " .
301             $header_info->{version} . "\n");
302             }
303              
304             # read options field and split out into separate names for each bit
305 9 50       17 return $header_info unless read_some(1,"options","dec");
306 9         22 $header_info->{opt_large_k} = ($header_info->{options} & 1);
307 9         19 $header_info->{opt_large_w} = ($header_info->{options} & 2) >> 1;
308 9         28 $header_info->{opt_final} = ($header_info->{options} & 4) >> 2;
309 9         16 $header_info->{opt_transform} = ($header_info->{options} & 8) >> 3;
310              
311             # read k (regular or large variety) and check for consistency
312 9 50       27 return $header_info unless
    50          
313             read_some($header_info->{opt_large_k} ? 2 : 1 ,"k","dec");
314 9 50 66     42 if (defined($k) and $k != $header_info->{k}) {
315 0         0 return header_error("Inconsistent quorum value read from streams\n");
316             } else {
317 9         19 $header_info->{quorum} = $header_info->{k};
318             }
319              
320             # read s (regular or large variety) and check for consistency
321 9 50       22 return $header_info unless
    50          
322             read_some($header_info->{opt_large_w} ? 2 : 1 ,"w","dec");
323 9 50 66     45 if (defined($w) and $w != $header_info->{w}) {
324             return
325 0         0 header_error("Inconsistent security values read from streams\n");
326             } else {
327 9         19 $header_info->{security} = $header_info->{w};
328             }
329              
330             # File offsets can be of variable width, so we precede each offset
331             # with a length field. For an offset of 0, we only have to store a
332             # single byte of zero (since it takes zero bytes to store the value
333             # zero). So while storing the start offset for a complete file is a
334             # little bit wasteful (1 extra byte) compared to just using an
335             # options bit to indicate that the share is a share for a complete
336             # file and just storing the file length, it helps us to keep the
337             # code simpler and less prone to errors by not having to treat full
338             # files any differently than chunks.
339              
340             # Read in the chunk_start value. We'll re-use the offset_width key
341             # for the chunk_next code, and then delete that key before pasing the
342             # hash back to the caller (provided we don't run into errors).
343 9 50       21 return $header_info unless read_some(1 ,"offset_width","dec");
344 9         16 my $offset_width=$header_info->{offset_width};
345              
346             # Perl has no problem working with values as big as 2 ** 41 == 2Tb, but
347             # we should probably impose a sane limit on file sizes here.
348 9 50       22 if ($offset_width > 4) {
349 0         0 return header_error("File size must be less than 4Gb!\n");
350             }
351              
352             # now read in chunk_start and check that that it is a multiple of k * w
353 9         18 my $colsize=$header_info->{k} * $header_info->{w};
354 9 50       14 if ($offset_width) {
355 0 0       0 return $header_info unless
356             read_some($offset_width ,"chunk_start","dec");
357 0 0       0 if ($header_info->{chunk_start} % ($colsize)) {
358 0         0 return header_error("Alignment error on chunk start offset\n");
359             }
360             } else {
361 9         16 $header_info->{chunk_start}=0;
362             }
363              
364             # and also that it's consistent with other shares
365 9 50 66     34 if (defined($start) and $start != $header_info->{chunk_start}) {
366 0         0 return header_error("Inconsistent chunk_start values read from streams\n");
367             } else {
368 9         11 $start=$header_info->{chunk_start};
369             }
370              
371             # now get in the offset of the end of the chunk
372             # Note that chunk_next must be a multiple of k * s
373 9 50       24 return $header_info unless read_some(1 ,"offset_width","dec");
374 9         13 $offset_width=$header_info->{offset_width};
375 9 50       20 if ($offset_width > 4) {
376 0         0 return header_error("File size must be less than 4Gb!\n");
377             }
378 9 50       15 if ($offset_width) {
379 9 50       16 return $header_info unless
380             read_some($offset_width, "chunk_next","dec");
381 9 50 33     36 if (!$header_info->{opt_final} and
382             ($header_info->{chunk_next}) % ($colsize)) {
383 0         0 return header_error("Alignment error on non-final chunk end offset\n");
384             }
385             } else {
386             # header end of 0 is strange, but we'll allow it for now and only
387             # raise an error later if chunk_next <= chunk_start. Test code
388             # should make sure that the program works correctly when
389             # splitting/combining zero-length files.
390 0         0 $header_info->{chunk_next}=0;
391             }
392 9 50 66     36 if (defined($next) and $next != $header_info->{chunk_next}) {
393 0         0 return header_error("Inconsistent chunk_next values read from streams\n");
394             } else {
395 9         13 $next=$header_info->{chunk_next};
396             }
397 9         17 delete $header_info->{offset_width}; # caller doesn't need or want this
398              
399             # don't allow chunk_start > chunk_next, but allow chunk_start ==
400             # chunk_next to represent an empty file
401 9 50       27 if ($header_info->{chunk_start} > $header_info->{chunk_next}) {
402 0         0 return header_error("Invalid chunk range: chunk_start > chunk_next\n");
403             }
404              
405             # If transform data is included in the header, then read in a matrix
406             # row of $k values of $s bytes apiece
407 9 50       17 if ($header_info->{opt_transform}) {
408 9         16 my $matrix_row=[];
409 9         26 for my $i (1 .. $header_info->{k}) {
410 27 50       58 return $header_info unless read_some($header_info->{w},"element");
411 27         69 push @$matrix_row, $header_info->{element};
412             }
413 9         14 delete $header_info->{element};
414 9         19 $header_info->{transform}=$matrix_row;
415             #warn "Read transform row: [" . (join ", ", map
416             # {sprintf("%02x",$_) } @{
417             # $header_info->{transform}
418             # }) . "]\n";
419             }
420              
421             # Now that we've read in all the header bytes, check that header
422             # size is consistent with expectations.
423 9 50 66     34 if (defined($hdr) and $hdr != $header_size) {
424 0         0 return header_error("Inconsistent header sizes read from streams\n");
425             } else {
426 9         16 $header_info->{header_size}=$header_size;
427             }
428              
429 9         813 return $header_info;
430             }
431              
432             # When writing the header, we return number of header bytes written or
433             # zero in the event of some error.
434             sub sf_write_ida_header {
435 12     12 0 242 my ($self,$class);
436 12 50 33     63 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
437 0         0 $self=shift;
438 0         0 $class=ref($self);
439             } else {
440 12         20 $self=$classname;
441             }
442 12         131 my %header_info=(
443             ostream => undef,
444             version => undef,
445             quorum => undef,
446             width => undef,
447             chunk_start => undef,
448             chunk_next => undef,
449             transform => undef,
450             opt_final => undef,
451             dry_run => 0,
452             @_
453             );
454 12         18 my $header_size=0;
455              
456             # save to local variables
457 108 50       212 my ($ostream,$version,$k,$s,$chunk_start,$chunk_next,
458             $transform,$opt_final,$dry_run) =
459             map {
460 12         19 exists($header_info{$_}) ? $header_info{$_} : undef
461             } qw(ostream version quorum width chunk_start chunk_next transform
462             opt_final dry_run);
463              
464 12 50 33     55 return 0 unless defined($version) and $version == 1;
465 12 50 33     96 return 0 unless defined($k) and defined($s) and
      33        
      33        
466             defined($chunk_start) and defined($chunk_next);
467              
468 12 50 33     53 return 0 if defined($transform) and scalar(@$transform) != $k;
469              
470 12 100       24 if ($dry_run) {
471             $ostream={
472 33     33   46 WRITE => sub { "do nothing" },
473 3         19 };
474             }
475              
476             # magic
477 12         31 $ostream->{WRITE}->(0x5346,2);
478 12         17 $header_size += 2;
479              
480             # version
481 12         30 $ostream->{WRITE}->($version,1);
482 12         13 $header_size += 1;
483              
484             # Set up and write options byte
485 12         15 my ($opt_large_k,$opt_large_w,$opt_transform);
486              
487 12 50       23 if ($k < 256) {
    0          
488 12         13 $opt_large_k=0;
489             } elsif ($k < 65536) {
490 0         0 $opt_large_k=1;
491             } else {
492 0         0 return 0;
493             }
494              
495 12 50       15 if ($s < 256) {
    0          
496 12         22 $opt_large_w=0;
497             } elsif ($s < 65536) {
498 0         0 $opt_large_w=1;
499             } else {
500 0         0 return 0;
501             }
502              
503 12 50       22 $opt_transform=(defined($transform) ? 1 : 0);
504              
505 12         52 $ostream->{WRITE}->((
506             ($opt_large_k) |
507             ($opt_large_w) << 1 |
508             ($opt_final) << 2 |
509             ($opt_transform) << 3),
510             1);
511 12         15 $header_size += 1;
512              
513             # write k and s values
514 12         32 $ostream->{WRITE}->($k, $opt_large_k + 1);
515 12         17 $header_size += $opt_large_k + 1;
516              
517 12         30 $ostream->{WRITE}->($s, $opt_large_w + 1);
518 12         16 $header_size += $opt_large_w + 1;
519              
520             # chunk_start, chunk_next
521 12         18 my ($width,$topval);
522              
523 12 50       33 if ($chunk_start == 0) {
524 12         27 $ostream->{WRITE}->(0,1);
525 12         18 $header_size += 1;
526             } else {
527 0         0 ($width,$topval)=(1,255);
528 0         0 while ($chunk_start > $topval) { # need another byte?
529 0         0 ++$width; $topval = ($topval << 8) + 255;
  0         0  
530             };
531 0         0 $ostream->{WRITE}->($width,1);
532 0         0 $ostream->{WRITE}->($chunk_start, $width);
533 0         0 $header_size += 1 + $width;
534             }
535              
536 12 50       24 if ($chunk_next == 0) {
537 0         0 $ostream->{WRITE}->(0,1);
538 0         0 $header_size += 1;
539             } else {
540 12         24 ($width,$topval)=(1,255);
541 12         28 while ($chunk_next > $topval) { # need another byte?
542 0         0 ++$width; $topval = ($topval << 8) + 255;
  0         0  
543             };
544 12         26 $ostream->{WRITE}->($width,1);
545 12         26 $ostream->{WRITE}->($chunk_next,$width);
546 12         17 $header_size += 1 + $width;
547             }
548              
549 12 50       33 if ($opt_transform) {
550 12         21 foreach my $elem (@$transform) {
551 36         66 $ostream->{WRITE}->($elem,$s);
552 36         68 $header_size += $s;
553             }
554             }
555              
556 12         79 return $header_size;
557             }
558              
559             # The following routine is exportable, since the caller may wish to
560             # know how large chunks are going to be before actually generating
561             # them. This could be useful, for example, if the caller needs to know
562             # how large the chunks are before deciding where to put them, or for
563             # trying out a different chunk size/strategy if the first one didn't
564             # suit their requirements.
565              
566             sub sf_calculate_chunk_sizes {
567 3     3 0 5 my ($self,$class);
568 0         0 my %o;
569              
570             # despite the routine name, we'll calculate several different values
571             # relating to each chunk:
572             # chunk_start
573             # chunk_next
574             # chunk_size (chunk_next - chunk_start)
575             # file_size (output file size, including header)
576             # opt_final (is the last chunk in the file?)
577             # padding (number of padding bytes in (final) chunk)
578             #
579             # We store these in a hash, and return a list of references to
580             # hashes, one for each chunk.
581              
582 3 50 33     21 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
583 0         0 $self=shift;
584 0         0 $class=ref($self);
585             } else {
586 3         6 $self=$classname;
587             }
588 3         38 %o=(
589             quorum => undef,
590             width => undef,
591             filename => undef,
592             # misc options
593             version => 1, # header version
594             save_transform => 1, # whether to store transform in header
595             # pick one method of calculating chunk size. The file is not
596             # broken into chunks unless one of these is defined.
597             n_chunks => undef,
598             in_chunk_size => undef,
599             out_chunk_size => undef,
600             out_file_size => undef,
601             @_,
602             dry_run => 1, # for call to sf_write_ida_header
603             );
604 3         5 my @chunks=();
605 3         5 my ($hs,$cb,$cn,$cs,$nc);
606              
607             # Copy options into local variables
608 27 50       159 my ($k, $w, $filename, $version, $save_transform,
609             $n_chunks, $in_chunk_size, $out_chunk_size, $out_file_size) =
610             map {
611 3         7 exists($o{$_}) ? $o{$_} : undef
612             } qw(quorum width filename version save_transform
613             n_chunks in_chunk_size out_chunk_size out_file_size);
614              
615             # Check some input values (more checks later)
616 3 0 33     22 unless ($w == 1 or $w == 2 or $w == 4) {
      33        
617 0         0 carp "Invalid width value";
618 0         0 return undef;
619             }
620 3 50 33     37 if ($k < 1 or $k >= 256 ** $w) {
621 0         0 carp "quorum value out of range";
622 0         0 return undef;
623             }
624             # leave version check until call to sf_write_ida_header
625              
626             # In all cases, we'll try to make all non-final chunks align to
627             # $quorum x $width bytes. Whichever method is used, we need to know
628             # what the total file size with/without padding will be.
629 3         54 my $file_size=-s $filename;
630 3 50       8 unless (defined($file_size)) {
631 0         0 return undef;
632             }
633 3         3 my $padded_file_size=$file_size;
634 3         14 while ($padded_file_size % ($k * $w)) {
635 0         0 ++$padded_file_size; # not very efficient, but it is easy
636             }
637              
638             # We'll pass %o onto sf_write_ida_header later, so we need a dummy
639             # value for transform if "save_transform" is set.
640 3 50 33     16 if (defined($save_transform) and $save_transform) {
641             #warn "making dummy transform array\n";
642 3         17 $o{"transform"} = [ (0) x ($k * $w) ];
643             } else {
644             #warn "save_transform not defined\n";
645 0         0 $o{"transform"} = undef;
646             }
647              
648             # Check that no more than one chunking method is set
649 3         5 my $defined_methods=0;
650 3 50       11 ++$defined_methods if (defined($n_chunks));
651 3 50       10 ++$defined_methods if (defined($in_chunk_size));
652 3 50       8 ++$defined_methods if (defined($out_chunk_size));
653 3 50       9 ++$defined_methods if (defined($out_file_size));
654              
655 3 50 33     20 if ($defined_methods > 1) {
    50          
656 0         0 carp "please select at most one method of calculating chunk sizes";
657 0         0 return undef;
658             } elsif ($file_size == 0 or $defined_methods == 0) {
659             # we can also handle the special case where $file_size == 0 here
660 3 50       7 unless ($file_size) {
661 0         0 carp "warning: zero-sized file $filename; will use single chunk";
662             }
663 3         17 ($cb,$cn,$cs)=(0,$file_size,$file_size);
664 3         8 $o{"chunk_start"} = $cb;
665 3         6 $o{"chunk_next"} = $cn;
666 3         25 $hs=sf_write_ida_header(%o);
667 3 50 33     34 unless (defined ($hs) and $hs > 0) {
668 0         0 carp "Something wrong with header options.";
669 0         0 return undef;
670             }
671             #warn "Single chunk\n";
672             return ( {
673 3         39 "chunk_start" => $cb,
674             "chunk_next" => $cn,
675             "chunk_size" => $cs,
676             "file_size" => $hs + $cs,
677             "opt_final" => 1,
678             "padding" => $padded_file_size - $file_size,
679             } );
680             }
681              
682             # on to the various multi-chunk methods ...
683 0 0 0     0 if (defined($n_chunks)) {
    0          
    0          
684 0 0       0 unless ($n_chunks > 0) {
685 0         0 carp "Number of chunks must be greater than zero!";
686 0         0 return undef;
687             }
688 0         0 my $max_n_chunks=$padded_file_size / ($k * $w);
689 0 0       0 if ( $n_chunks > $max_n_chunks ) {
690 0         0 carp "File is too small for n_chunks=$n_chunks; using " .
691             "$max_n_chunks instead";
692 0         0 $n_chunks=$max_n_chunks;
693             }
694             # creating chunks of exactly the same size may not be possible
695             # since we have to round to matrix column size. Rounding down
696             # means we'll end up with a larger chunk at the end, while
697             # rounding up means we might produce some zero-sized chunks at the
698             # end. The former option is most likely the Right Thing. Even
699             # though it might be nice to make the first chunk bigger, it's
700             # easier to code if we let the last chunk take up any excess. To
701             # do this we can round the chunk size up to the nearest multiple
702             # of $n_chunks first, then round down to the nearest column
703             # size. We should end up with a non-zero value since we've imposed
704             # a limit on the maximum size of $n_chunks above.
705 0         0 $cs = int(($padded_file_size + $n_chunks - 1) / $n_chunks);
706 0         0 $cs -= $cs % ($k * $w);
707 0 0       0 die "Got chunk size of zero with padded file_size $padded_file_size," .
708             " n_chunks=$n_chunks (this shouldn't happen)\n" unless $cs;
709 0         0 ($cb,$cn)=(0,$cs);
710 0         0 for my $i (0 .. $n_chunks - 2) { # all pre-final chunks
711 0         0 $o{"chunk_start"} = $cb;
712 0         0 $o{"chunk_next"} = $cn;
713 0         0 $hs=sf_write_ida_header(%o);
714 0 0 0     0 unless (defined ($hs) and $hs > 0) {
715 0         0 carp "Something wrong with header options for chunk $i.";
716 0         0 return undef;
717             }
718             #warn "Chunk $cb-$cn, size $cs, fs=$hs + $cs, final=0\n";
719 0         0 push @chunks, {
720             "chunk_start" => $cb,
721             "chunk_next" => $cn,
722             "chunk_size" => $cs,
723             "file_size" => $hs + $cs,
724             "opt_final" => 0,
725             "padding" => 0,
726             };
727 0         0 $cb += $cs;
728 0         0 $cn += $cs;
729             }
730             # final chunk; need to do this separately since we need to pass
731             # correct values for chunk range to accurately calculate the
732             # header size (ie, a rough figure won't do if chunk_next is close
733             # to 256 ** width)
734 0         0 $o{"chunk_start"} = $cb;
735 0         0 $o{"chunk_next"} = $file_size; # without padding
736 0         0 $hs=sf_write_ida_header(%o);
737 0         0 push @chunks, {
738             "chunk_start" => $cb,
739             "chunk_next" => $file_size,
740             "chunk_size" => $file_size - $cb,
741             "file_size" => $hs + $padded_file_size,
742             "opt_final" => 1,
743             "padding" => $padded_file_size - $file_size,
744             };
745             #warn "Last chunk: $cb-$padded_file_size, size ".
746             # ($padded_file_size - $cb) . ", fs=$hs + $padded_file_size - $cb, ".
747             # "final=1\n";
748              
749 0 0       0 die "last chunk starts beyond eof (this shouldn't happen)\n" if
750             ($cb >= $padded_file_size);
751             # ... and return the array
752 0         0 return @chunks;
753             } elsif (defined($in_chunk_size) or defined($out_chunk_size)) {
754             # this can actually be rolled into the above n_chunks method
755 0         0 carp "not implemented yet";
756 0         0 return undef;
757             } elsif (defined($out_chunk_size)) {
758 0         0 carp "not implemented yet";
759 0         0 return undef;
760             } else {
761 0         0 1;
762             #die "problem deciding chunking method (shouldn't get here)\n";
763             }
764             }
765              
766             sub sf_split {
767 3     3 0 615 my ($self,$class);
768 3 50 33     25 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
769 0         0 $self=shift;
770 0         0 $class=ref($self);
771             } else {
772 3         7 $self=$classname;
773             }
774 3         66 my %o=(
775             # We'll be passing this hash on directly to ida_split later on
776             # so option names here will overlap with the option names needed
777             # by that routine. The same applies to option names in
778             # sf_write_ida_header.
779             shares => undef,
780             quorum => undef,
781             width => 1,
782             filename => undef,
783             # supply a key, a matrix or neither
784             key => undef,
785             matrix => undef,
786             # misc options
787             version => 1, # header version
788             rand => "/dev/urandom",
789             bufsize => 4096,
790             save_transform => 1,
791             # pick at most one chunking method. The file is not broken into
792             # chunks unless one of these is defined.
793             n_chunks => undef,
794             in_chunk_size => undef,
795             out_chunk_size => undef,
796             out_file_size => undef,
797             # allow creation of a subset of shares, chunks
798             sharelist => undef, # [ $row1, $row2, ... ]
799             chunklist => undef, # [ $chunk1, $chunk2, ... ]
800             # specify pattern to use for share filenames
801             filespec => undef, # default value set later on
802             @_,
803             # The file format uses network (big-endian) byte order, so store
804             # this info after all the user-supplied options have been read
805             # in
806             inorder => 2,
807             outorder => 2,
808             opt_final => 0,
809             );
810              
811 3         4 my (@chunks, @results);
812              
813             # Copy options into local variables
814 51 50       122 my ($n, $k, $w, $filename,
815             $key, $mat, $version,
816             $rng, $bufsize,
817             $save_transform,
818             $n_chunks, $in_chunk_size, $out_chunk_size, $out_file_size,
819             $sharelist, $chunklist,$filespec
820             ) =
821             map {
822 3         10 exists($o{$_}) ? $o{$_} : undef
823             } qw(
824             shares quorum width filename
825             key matrix version
826             rand bufsize save_transform
827             n_chunks in_chunk_size out_chunk_size out_file_size
828             sharelist chunklist filespec);
829              
830              
831             # Pass all options to sf_calculate_chunk_sizes and let it figure out
832             # all the details for each chunk.
833 3         28 @chunks=sf_calculate_chunk_sizes(%o);
834 3 50       13 unless (defined($chunks[0])) {
835 0         0 carp "Problem calculating chunk sizes from given options";
836 0         0 return undef;
837             }
838              
839             # Now that we know how many chunks there are, we can check that the
840             # filespec mentions "%c" for the chunk number. The "%s" specifier is
841             # also always required. Also, we can set up different default
842             # filespecs for single-chunk and multi-chunk splits.
843 3 50       7 if (defined($filespec)) {
844 0 0       0 unless ($filespec =~ /\%s/) {
845 0         0 carp "filespec must include \%s for share number";
846 0         0 return undef;
847             }
848 0 0 0     0 unless (scalar (@chunks) == 1 or $filespec =~ /\%c/) {
849 0         0 carp "filespec must include \%c for multi-chunk splits";
850 0         0 return undef;
851             }
852             } else {
853 3 50       11 $filespec=(scalar (@chunks) == 1) ? '%f-%s.sf' : '%f-%c-%s.sf';
854             }
855              
856             # check the sharelist and chunklist arrays to weed out dups and
857             # invalid share/chunk numbers. If we weren't passed a value for one
858             # or the other, then we'll default to processing all shares/all
859             # chunks.
860 3 100       6 if (defined($sharelist)) {
861 1         8 ida_check_list($sharelist,"share",0,$n-1);
862 1 50       43 unless (scalar(@$sharelist) == $n) {
863 0         0 carp "sharelist does not contain n=$n share numbers; aborting";
864 0         0 return undef;
865             }
866             } else {
867 2         6 $sharelist=[ 0 .. $n - 1 ];
868             }
869              
870 3 50       6 if (defined($chunklist)) {
871 0         0 ida_check_list($chunklist,"chunk",0,scalar(@chunks)-1);
872 0 0       0 unless (scalar(@$chunklist) > 0) {
873 0         0 carp "chunklist does not contain any valid chunk numbers; aborting";
874 0         0 return undef;
875             }
876             } else {
877 3         11 $chunklist=[ 0 .. scalar(@chunks) - 1 ];
878             }
879              
880             # Now loop through each chunk that we've been asked to create
881 3         5 for my $i (@$chunklist) {
882              
883 3         71 my $chunk=$chunks[$i];
884 3         9 my @sharefiles=(); # we return a list of files in each
885             # chunk at the end of the routine.
886              
887             # Unpack chunk details into local variables. Not all these
888             # variables are needed, but we might as well unpack them anyway.
889 18         30 my ($chunk_start,$chunk_next,$chunk_size,$file_size,
890             $opt_final,$padding) =
891 3         5 map { $chunk->{$_} }
892             qw (
893             chunk_start chunk_next chunk_size file_size
894             opt_final padding
895             );
896              
897             # We should only really need to open the input file once,
898             # regardless of how many chunks/shares we're creating. But since
899             # we're using Crypt::IDA's file reader, and it allows us to seek
900             # to the start of the chunk when we create the callback, it's
901             # easier to (re-)open and seek once per chunk.
902 3         15 my $filler=fill_from_file($filename, $k * $w, $chunk_start);
903 3 50       10 unless (defined($filler)) {
904 0         0 carp "Failed to open input file: $!";
905 0         0 return undef;
906             }
907              
908             # Unfortunately, creating a new share isn't quite as simple as
909             # calling ida_split with all our parameters. The job is
910             # complicated by the fact that we need to store both the share
911             # data and (usually) a row of the transform matrix. In the case
912             # where a new transform matrix would be created by the call to
913             # ida_split, then we would have to wait until it returned before
914             # writing the transform rows for it to each share header. But that
915             # would require that we write the header after the share, which
916             # isn't a very nice solution. Also, we'd still have to calculate
917             # the correct amount of space to allocate for the header before
918             # setting up the empty handlers, which is also a bit messy.
919             #
920             # The simplest solution is to examine the key/matrix and
921             # save_transform options we've been given and call the
922             # ida_generate_key and/or ida_key_to_matrix routines ourselves, if
923             # necessary. Then we will know which transform rows to save with
924             # each share and we can pass our generated key/matrix directly on
925             # to ida_split.
926              
927 3 50       24 if (ida_check_transform_opts(%o)) {
928 0         0 carp "Can't proceed due to problem with transform options";
929 0         0 return undef;
930             }
931 3 100       12 unless (defined($mat)) {
932 2 100       6 if (defined ($key)) {
933 1 50       7 if (ida_check_key($k,$n,$w,$key)) {
934 0         0 carp "Problem with supplied key";
935 0         0 return undef;
936             }
937             } else {
938 1         4 $rng=ida_rng_init($w,$rng); # swap string for closure
939 1 50       5 unless (defined($rng)) {
940 0         0 carp "Failed to initialise random number generator";
941 0         0 return undef;
942             }
943 1         4 $key=ida_generate_key($k,$n,$w,$rng);
944             }
945              
946             # now generate matrix from key
947 2         13 $mat=ida_key_to_matrix( "quorum" => $k,
948             "shares" => $n,
949             "width" => $w,
950             "sharelist" => $sharelist,
951             "key" => $key,
952             "skipchecks?" => 0);
953 2 50       7 unless (defined($mat)) {
954 0         0 carp "bad return value from ida_key_to_matrix";
955 0         0 return undef;
956             }
957 2         4 $o{"matrix"}=$mat; # stash new matrix
958 2         8 $o{"key"}=undef; # and undefine key (if any)
959             }
960              
961 3         7 $o{"chunk_start"}= $chunk_start; # same values for all shares
962 3         5 $o{"chunk_next"} = $chunk_next; # in this chunk
963 3         5 $o{"opt_final"} = $opt_final;
964             #warn "Going to create chunk $chunk_start - $chunk_next (final $opt_final)\n";
965 3         6 my $emptiers=[];
966 3         8 for my $j (@$sharelist) {
967             # For opening output files, we're responsible for writing the file
968             # header, so we first make one of our ostreams, write the header,
969             # then create a new empty_to_fh handler which will seek past the
970             # header.
971 9         26 my $sharename = sf_sprintf_filename($filespec, $filename, $i, $j);
972 9         151 unlink $sharename; # remove any existing file
973 9         22 my $sharestream = sf_mk_file_ostream($sharename, $w);
974 9 50       19 unless (defined($sharestream)) {
975 0         0 carp "Failed to create share file (chunk $i, share $j): $!";
976 0         0 return undef;
977             }
978 9         67 my $hs=sf_write_ida_header(%o, ostream => $sharestream,
979             transform => [$mat->getvals($j,0,$k)]);
980 9 50 33     61 unless (defined ($hs) and $hs > 0) {
981 0         0 carp "Problem writing header for share (chunk $i, share $j)";
982 0         0 return undef;
983             }
984 9 50       24 unless ($hs + $chunk_size == $file_size) {
985 0         0 carp "file size mismatch ($i,$j) (this shouldn't happen)";
986 0         0 carp "hs=$hs; chunk_size=$chunk_size; file_size=$file_size; pad=$padding";
987 0         0 return undef;
988             }
989 9         20 my $emptier=empty_to_fh($sharestream->{"FH"}->(),$hs);
990 9         20 push @$emptiers, $emptier;
991 9         123 push @sharefiles, $sharename;
992             }
993              
994             # Now that we've written the headers and set up the fill and empty
995             # handlers, we only need to add details of the filler and
996             # emptiers, then pass the entire options array on to ida_split to
997             # create all shares for this chunk.
998 3         9 $o{"filler"} = $filler;
999 3         7 $o{"emptiers"} = $emptiers;
1000 3         25 my ($key,$mat,$bytes)=ida_split(%o);
1001              
1002             # check for success, then save the results
1003 3 50       14 unless (defined($mat)) {
1004 0         0 carp "detected failure in ida_split; quitting";
1005 0         0 return undef;
1006             }
1007 3         19 push @results, [$key,$mat,$bytes, @sharefiles];
1008              
1009             # Perl should handle closing file handles for us once they go out
1010             # of scope and they're destroyed.
1011              
1012             }
1013              
1014 3         195 return @results;
1015              
1016             }
1017              
1018             sub sf_combine {
1019              
1020             # Combining files is complicated by two issues:
1021             #
1022             # * Given a list of files, we don't know anything about which files
1023             # are supposed to belong to which chunk, so we would need to read
1024             # through the file headers to determine the chunk_start,
1025             # chunk_next values and use these to group the files.
1026             # * The file format allows for omission of the transform data, so we
1027             # have to support having a key or transform matrix passed to us
1028             # for each chunk.
1029             #
1030             # The simplest solution to the first problem is to place
1031             # responsibilty for identifying which files go together to form a
1032             # complete chunk on the user. This should not be too onerous a task,
1033             # since the sf_split routine allows the caller to store the share
1034             # number and chunk number in each output filename. It also returns
1035             # the names of the sharefiles for each chunk.
1036             #
1037             # That still leaves the problem of passing in a key or transform
1038             # matrix and a row list (to associate each share with a particular
1039             # row of the transform matrix). The problem isn't so much with being
1040             # able to support this method of operation (since ida_combine
1041             # already supports it), but with coming up with a calling convention
1042             # which won't be overly complex.
1043             #
1044             # Note that the this issue of the key/transform matrix being stored
1045             # outside the file highlights a potential problem with the file
1046             # format. Namely, if the transform data isn't stored in the file,
1047             # there's nothing within the file itself to indicate which row of
1048             # the transform matrix the share corresponds to. The filename itself
1049             # should provide this data, but if the contents of the file are
1050             # transmitted and the filename gets lost or changed, then there's a
1051             # possibility that the person combining the files will have
1052             # problems. There are two solutions to this: either the split
1053             # routine incorporates a share number within the header, or it's up
1054             # to the central issuing authority (Dealer) to, eg, store a hash of
1055             # each share and the associated row number for that share in the
1056             # same database it uses to store the key/transform matrix. Partly
1057             # because there are solutions, but mostly because I don't think that
1058             # the centrally-stored key/transform matrix idea is a very good one,
1059             # I don't feel inclined to change the current file format to include
1060             # row numbers in the header. At least not for the time being.
1061             #
1062             # Getting back to the issue at hand, namely the calling convention,
1063             # it seems that the best solution would be keep the options here
1064             # as close as possible to the ones accepted by ida_combine. The two
1065             # differences are:
1066             #
1067             # * instead of fillers and an emptier, we handle infiles and an
1068             # outfile
1069             #
1070             # * since we might need to handle multiple chunks, but ida_combine
1071             # only operates on a single set of shares, we should either accept
1072             # an array of options (one for each chunk) or only operate on
1073             # one chunk at a time. I'll go with the latter option since it
1074             # doesn't place much extra work (if any) on the calling program
1075             # and it probably makes it less error-prone since the user doesn't
1076             # have to remember to pass an array rather than a list of options.
1077             # Having finer granularity might help the caller with regards to
1078             # handling return values and error returns, too.
1079             #
1080             # That said, on with the code ...
1081              
1082 3     3 1 248 my ($self,$class);
1083 3 50 33     25 if ($_[0] eq $classname or ref($_[0]) eq $classname) {
1084 0         0 $self=shift;
1085 0         0 $class=ref($self);
1086             } else {
1087 3         6 $self=$classname;
1088             }
1089 3         35 my %o=
1090             (
1091             # Options for source, sinks. These are the only required options.
1092             infiles => undef, # [ $file1, $file2, ... ]
1093             outfile => undef, # "filename"
1094             # If specified, the following must agree with the values stored
1095             # in the sharefiles. There's normally no need to set these.
1096             quorum => undef,
1097             width => undef,
1098             # If matrix is set, it must be a pre-inverted matrix, and it will
1099             # override any values read in from the file (along with emitting
1100             # a warning if both are found). Alternatively, if a key is
1101             # supplied, the 'shares' and 'sharelist' options must also be
1102             # given. A 'key' will also override any values stored in the file
1103             # and also emit a warning if both are found.
1104             key => undef,
1105             matrix => undef,
1106             shares => undef, # only needed if key supplied
1107             sharelist => undef, # only needed if key supplied
1108             # misc options
1109             bufsize => 4096,
1110             @_,
1111             # byte order options (can't be overriden)
1112             inorder => 2,
1113             outorder => 2,
1114             # no point in accepting a user-supplied value of $bytes, since we
1115             # determine this from the share headers
1116             bytes => undef,
1117             );
1118              
1119             # copy all options into local variables
1120 39 50       95 my ($k,$n,$w,$key,$mat,$shares,$sharelist,$infiles,$outfile,
1121             $bufsize,$inorder,$outorder,$bytes) =
1122             map {
1123 3         8 exists($o{$_}) ? $o{$_} : undef;
1124             } qw(quorum shares width key matrix shares sharelist
1125             infiles outfile bufsize inorder outorder bytes);
1126 3         9 my $fillers=[];
1127              
1128             # Check options
1129 3 50 33     12 if (defined($key) and defined($mat)) {
1130 0         0 carp "Conflicting key/matrix options given.";
1131 0         0 return undef;
1132             }
1133 3 50 0     12 if (defined($key) and !(defined($shares) and defined($sharelist))) {
      33        
1134 0         0 carp "key option also requires shares and sharelist options.";
1135 0         0 return undef;
1136             }
1137              
1138             # Weed out any duplicate input file names. If duplicates are found,
1139             # and the sharelist option is given, then some of the share numbers
1140             # in that list will probably now be wrong. Rather than trying to fix
1141             # up the numbers, we'll take the easy way out and simply report it
1142             # as an error and return.
1143 3 50       12 unless (scalar(@$infiles)) {
1144 0         0 carp "No input files to process; aborting.";
1145 0         0 return undef;
1146             }
1147 3         5 my %saw_file;
1148 3         6 my $new_filelist=[];
1149 3         75 foreach my $infile (@$infiles) {
1150 9 50       57 if (exists($saw_file{$infile})) {
1151 0         0 carp "Ignoring duplicate input file: $infile";
1152             } else {
1153 9 50       23 if (defined ($sharelist)) {
1154 0         0 carp "Duplicate file invalidates supplied sharelist; aborting";
1155 0         0 return undef;
1156             }
1157 9         25 $saw_file{$infile} = 1;
1158 9         25 push @$new_filelist, $infile;
1159             }
1160             }
1161 3         8 $infiles=$new_filelist;
1162              
1163             # If k-value is given, only check it after we've de-duped the input
1164             # file list.
1165 3 50 33     11 if (defined($k) and scalar(@$infiles) < $k) {
1166 0         0 carp "For given quorum value $k, I need (at least) $k infiles";
1167 0         0 return undef;
1168             }
1169              
1170             # We won't build the transform matrix until later (or not at all if
1171             # we're supplied with a matrix or key option). We'll store the
1172             # values returned from sf_read_ida_header in a regular array and
1173             # then convert them into a Math::FastGF2::Matrix object when we come
1174             # to calculating the inverse matrix.
1175 3         8 my @matrix=();
1176              
1177             # Read in headers from each infile and create a new filler for each
1178 3         8 my ($nshares, $header_info, $header_size)=(0,undef,undef);
1179 3         7 my ($chunk_start,$chunk_next)=(undef,undef);
1180 3         9 foreach my $infile (@$infiles) {
1181              
1182 9         25 my $istream=sf_mk_file_istream($infile,1);
1183 9 50       30 unless (defined($istream)) {
1184 0         0 carp "Problem opening input file $infile: $!";
1185 0         0 return undef;
1186             }
1187              
1188             # It's fine for some of these values to be undefined the first
1189             # time around. However if they were specified as options and
1190             # the first header read in doesn't match, or if shares have
1191             # inconsistent values then read_ida_header will detect this.
1192 9         23 $header_info=sf_read_ida_header($istream,$k,$w,$chunk_start,
1193             $chunk_next,$header_size);
1194              
1195 9 50       36 if ($header_info->{error}) {
1196 0         0 carp $header_info->{error_message};
1197 0         0 return undef;
1198             }
1199              
1200             # Store values to check for consistency across all shares
1201 9         16 $k = $header_info->{k};
1202 9         12 $w = $header_info->{w};
1203 9         12 $header_size = $header_info->{header_size};
1204 9         11 $chunk_start = $header_info->{chunk_start};
1205 9         11 $chunk_next = $header_info->{chunk_next};
1206              
1207 9 50       17 if (++$nshares <= $k) {
1208 9 50       17 if ($header_info->{opt_transform}) {
1209 9 50       22 if (defined($mat)) {
    50          
1210 0         0 carp "Ignoring file transform data (overriden by matrix option)";
1211             } elsif (defined($key)) {
1212 0         0 carp "Ignoring file transform data (overriden by key option)";
1213             } else {
1214             #warn "Adding new transform row: [" . (join ", ", map
1215             # {sprintf("%02x",$_) } @{
1216             # $header_info->{transform}
1217             # }) . "]\n";
1218 9         16 push @matrix, $header_info->{transform};
1219             }
1220             } else {
1221 0 0 0     0 unless (defined ($mat) or defined($key)) {
1222 0         0 carp "Share file contains no transform data and no " .
1223             "key/matrix options were supplied.";
1224 0         0 return undef;
1225             }
1226             }
1227             } else {
1228 0         0 carp "Redundant share(s) detected and ignored";
1229 0         0 last;
1230             }
1231              
1232             #warn "Filler to skip $header_size bytes\n";
1233 9         38 push @$fillers, fill_from_file($infile,$k * $w, $header_size);
1234             }
1235              
1236             # Now that the header has been read in and all the streams agree on
1237             # $k and $w, we proceed to build the inverse matrix unless we've
1238             # been supplied with a key or (pre-inverted) matrix.
1239              
1240             # first make sure we have k valid shares to combine
1241 3 50       11 unless ($nshares >= $k) {
1242 0         0 carp "Wrong number of shares to combine (have $nshares, want $k)";
1243 0         0 return undef;
1244             }
1245              
1246 3 50 33     17 unless (defined($key) or defined($mat)) {
1247             #warn "Trying to create combine matrix with k=$k, w=$w\n";
1248 3         26 $mat=Math::FastGF2::Matrix->new(
1249             rows => $k,
1250             cols => $k,
1251             width => $w,
1252             org => "rowwise",
1253             );
1254 3         99 my @vals=();
1255 3         7 map { push @vals, @$_ } @matrix;
  9         18  
1256             #warn "matrix is [" . (join ", ", map
1257             # {sprintf("%02x",$_) } @vals) . "] (" .
1258             # scalar(@vals) . " values)\n";
1259 3         16 $mat->setvals(0,0, \@vals, $inorder);
1260 3         115 $mat=$mat->invert();
1261 3 50       819 unless (defined($mat)) {
1262 0         0 carp "Failed to invert matrix!";
1263 0         0 return undef;
1264             }
1265             #@vals=$mat->getvals(0,0,$k * $k);
1266             #warn "inverse is [" . (join ", ", map
1267             # {sprintf("%02x",$_) } @vals) . "] (" .
1268             # scalar(@vals) . " values)\n";
1269              
1270             }
1271              
1272 3         6 $bytes=$chunk_next - $chunk_start;
1273 3 50       11 if ($bytes % ($k * $w)) {
1274 0 0       0 unless ($header_info->{"opt_final"}) {
1275 0         0 carp "Invalid: non-final share is not a multiple of quorum x width";
1276 0         0 return undef;
1277             }
1278 0         0 $bytes += (($k * $w) - $bytes % ($k * $w));
1279             }
1280              
1281             # we leave creating/opening the output file until relatively late
1282             # since we need to know what offset to seek to in it, and we only
1283             # know that when we've examined the sharefile headers
1284 3         15 my $emptier=empty_to_file($outfile,undef,$chunk_start);
1285              
1286             # Need to update %o before calling ida_combine
1287 3         10 $o{"emptier"} = $emptier; # leave error-checking to ida_combine
1288 3         6 $o{"fillers"} = $fillers;
1289 3 50       11 $o{"matrix"} = $mat unless (defined($key));
1290 3         6 $o{"quorum"} = $k;
1291 3         5 $o{"width"} = $w;
1292 3         5 $o{"bytes"} = $bytes;
1293              
1294 3         24 my $output_bytes=ida_combine(%o);
1295              
1296 3 50       15 return undef unless defined($output_bytes);
1297              
1298 3 50       10 if ($header_info->{opt_final}) {
1299             #warn "Truncating output file to $header_info->{chunk_next} bytes\n";
1300 3         128 truncate $outfile, $header_info->{chunk_next};
1301             }
1302              
1303 3         185 return $output_bytes;
1304             }
1305              
1306             1;
1307              
1308             __END__