File Coverage

blib/lib/File/Unpack.pm
Criterion Covered Total %
statement 319 1006 31.7
branch 123 712 17.2
condition 51 254 20.0
subroutine 42 63 66.6
pod 14 14 100.0
total 549 2049 26.7


line stmt bran cond sub pod time code
1             #
2             # (C) 2010-2015, jnw@cpan.org, all rights reserved.
3             # Distribute under the same license as Perl itself.
4             #
5             #
6             # sudo zypper -v in perl-Compress-Raw-Zlib
7             # -> 'nothing to do'
8             # sudo zypper -v in 'perl-Compress-Raw-Zlib >= 2.027'
9             # -> 'perl' providing 'perl-Compress-Raw-Zlib >= 2.027' is already installed.
10             # sudo zypper -v in --force perl-Compress-Raw-Zlib
11             # -> works,
12             # sudo zypper -v in --from 12 perl-Compress-Raw-Zlib
13             # -> works, if d.l.p is repo #12.
14             #
15             # TODO:
16             # * evaluate File::Extract - Extract Text From Arbitrary File Types
17             # (HTML, PDF, Plain, RTF, Excel)
18             #
19             # * make taint checks really check things, instead of $1 if m{^(.*)$};
20             #
21             # * Implement disk space monitoring.
22             #
23             # * formats:
24             # - use lzmadec/xzdec as fallback to lzcat.
25             # - glest has bzipped tar files named glest-1.0.10-data.tar.bz2.tar;
26             # - Not all suffixes are appended by . e.g. openh323-v1_15_2-src-tar.bz2 is different.
27             # - gzip -dc can unpack old compress .Z, add its mime-type
28             # - java-1_5_0-sun hides zip-files in shell scripts with suffix .bin
29             # - cpio fails on \.delta\.rpm
30             # - rpm files should extract all header info in readable format.
31             # - do we rely on rpm2cpio to handle them all:
32             # rpm -qp --nodigest --nosignature --qf "%{PAYLOADCOMPRESSOR}" $f
33             # - m{\.(otf|ttf|ps|eps)$}i
34             # - application/x-frame # xorg-modular/doc/xorg-docs/specs/XPRINT/xp_libraryTOC.doc
35             #
36             # * blacklisting?
37             # # th_en_US.dat is an 11MB thesaurus in OOo
38             # skip if $from =~ m{(/(ustar|pax)\-big\-\d+g\.tar\.bz2|/th_en_US\.dat|/testtar\.tar|\.html\.(ru|ja|ko\.euc-kr|fr|es|cz))$}
39             #
40             # * use LWP::Simple::getstore() if $archive =~ m{^\w+://}
41             # * application/x-debian-package is a 'application/x-archive' -> (ar xv /dev/stdin) < $qufrom";
42             # * application/x-iso9660 -> "isoinfo -d -i %(src)s"
43             # * PDF improvement: okular says: 'this document contains embedded files.' How can we grab those?
44              
45 9     9   163485 use warnings;
  9         16  
  9         389  
46 9     9   44 use strict;
  9         13  
  9         546  
47              
48             package File::Unpack;
49              
50             BEGIN
51             {
52             # Requires: shared-mime-info
53 9     9   498 eval 'use File::LibMagic;'; # only needed in mime(); mime() dies, if missing
  9     9   1864  
  0         0  
  0         0  
54 9     9   446 eval 'use File::MimeInfo::Magic;'; # only needed in mime(); okay, if missing.
  9         4629  
  9         64520  
  9         439  
55             # unless builtin!
56 9     9   560 eval 'use Compress::Raw::Lzma;'; # only needed in mime(); for finding lzma.
  9         1804  
  0         0  
  0         0  
57 9     9   439 eval 'use Compress::Raw::Bzip2;'; # only needed in mime(); for finding second level types
  9         8425  
  9         11852  
  9         1023  
58 9     9   529 eval 'use Compress::Raw::Zlib;'; # only needed in mime(); for finding second level types
  9         9744  
  9         41057  
  9         1907  
59 9     9   548 eval 'use BSD::Resource;'; # setrlimit
  9         5436  
  9         32564  
  9         42  
60 9     9   2214 eval 'use Filesys::Statvfs;'; # statvfs();
  9         4161  
  9         4338  
  9         527  
61             }
62              
63 9     9   46 use Carp;
  9         11  
  9         444  
64 9     9   45 use File::Path;
  9         10  
  9         493  
65 9     9   6949 use File::Temp (); # tempdir() in _run_mime_helper.
  9         178163  
  9         243  
66 9     9   4854 use File::Copy ();
  9         18004  
  9         241  
67 9     9   4559 use File::Compare ();
  9         7292  
  9         190  
68 9     9   5310 use JSON;
  9         81122  
  9         50  
69 9     9   5901 use String::ShellQuote; # used in _prep_configdir
  9         5796  
  9         568  
70 9     9   8080 use IPC::Run; # implements File::Unpack::run()
  9         272765  
  9         421  
71 9     9   4590 use Text::Sprintf::Named; # used to parse @builtin_mime_helpers
  9         8796  
  9         443  
72 9     9   57 use Cwd 'getcwd'; # run(), moves us there and back.
  9         16  
  9         440  
73 9     9   7883 use Data::Dumper;
  9         47266  
  9         655  
74 9     9   83 use POSIX ();
  9         12  
  9         21526  
75              
76             =head1 NAME
77              
78             File::Unpack - A strong bz2/gz/zip/tar/cpio/rpm/deb/cab/lzma/7z/rar/... archive unpacker, based on mime-types
79              
80             =head1 VERSION
81              
82             Version 0.70
83             =cut
84              
85             # We'll have 1.x versions only after minfree() has a baseline implementation.
86             # Please run perl Makefile.PL after changing the version here.
87             our $VERSION = '0.70';
88              
89             POSIX::setlocale(&POSIX::LC_ALL, 'C');
90             $ENV{PATH} = '/usr/bin:/bin';
91             $ENV{SHELL} = '/bin/sh';
92             delete $ENV{ENV};
93              
94             # what we name the temporary directories, while helpers are working.
95             my $TMPDIR_TEMPL = '_fu_XXXXX';
96              
97             # no longer used by the tick-tick ticker to show where we are.
98             # my $lsof = '/usr/bin/lsof';
99              
100             # Compress::Raw::Bunzip2 needs several 100k of input data, we special case this.
101             # File::LibMagic wants to read ca. 70k of input data, before it says application/vnd.ms-excel
102             # Anything else works with 1024.
103             my $UNCOMP_BUFSZ = 1024;
104              
105             # unpack will give up, after unpacking that many levels. It is more likely we
106             # got into a loop by then, than really have that many levels.
107             my $RECURSION_LIMIT = 200;
108              
109             # Suggested place, where admins should install the helpers bundled with this module.
110 7 50   7   55 sub _default_helper_dir { $ENV{FILE_UNPACK_HELPER_DIR}||'/usr/share/File-Unpack/helper' }
111              
112             # we use '=' in the mime_name, this expands to '/(x\-|ANY\+)?'
113             ##
114             ## Caution: always use (?: ... ) below for grouping, so that no extra capturing clauses are created.
115              
116             my @builtin_mime_helpers = (
117             # mimetype pattern # suffix_re # command with redirects, as defined with IPC::Run::run
118              
119             # Requires: xz bzip2 gzip unzip lzip
120             [ 'application=x-lzip', qr{(?:lz)}, [qw(/usr/bin/lzip -dc %(src)s)], qw(> %(destfile)s) ],
121             [ 'application=xz', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/lzcat)], qw(< %(src)s > %(destfile)s) ],
122             [ 'application=xz', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/xz -dc %(src)s)], qw(> %(destfile)s) ],
123             [ 'application=lzma', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/lzcat)], qw(< %(src)s > %(destfile)s) ],
124             [ 'application=lzma', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/xz -dc %(src)s)], qw(> %(destfile)s) ],
125             [ 'application=bzip2', qr{bz2}, [qw(/usr/bin/bunzip2 -dc -f %(src)s)], qw(> %(destfile)s) ],
126             [ 'application=gzip', qr{(?:gz|Z)}, [qw(/usr/bin/gzip -dc -f %(src)s)], qw(> %(destfile)s) ],
127             [ 'application=compress', qr{(?:gz|Z)}, [qw(/usr/bin/gzip -dc -f %(src)s)], qw(> %(destfile)s) ],
128              
129             # Requires: sharutils
130             [ 'text=uuencode', qr{uu}, [qw(/usr/bin/uudecode -o %(destfile)s %(src)s)] ],
131              
132             # Requires: upx
133             [ 'application=upx', qr{(?:upx\.exe|upx)}, [qw(/usr/bin/upx -q -q -q -d -o%(destfile)s %(lsrc)s) ] ],
134              
135             # xml.summary.Mono.Security.Authenticode is twice inside of monodoc-1.0.4.tar.gz/Mono.zip/ -> use -o
136             [ 'application=zip', qr{(?:zip|jar|sar)}, [qw(/usr/bin/unzip -P no_pw -q -o %(src)s)] ],
137              
138             # Requires: unrar
139             [ 'application=rar', qr{rar}, [qw(/usr/bin/unrar x -o- -p- -inul -kb -y %(src)s)] ],
140             # Requires: lha
141             [ 'application=x-lha', qr{lha}, [qw(/usr/bin/lha x -q %(src)s)] ],
142              
143             # Requires: binutils
144             [ 'application=archive', qr{(?:a|ar|deb)}, [qw(/usr/bin/ar x %(src)s)] ],
145             [ 'application=x-deb', qr{deb}, [qw(/usr/bin/ar x %(src)s)] ],
146             [ 'application=x-debian-package', qr{deb}, [qw(/usr/bin/ar x %(src)s)] ],
147              
148             # Requires: xar
149             [ 'application=x-xar', qr{pkg}, [qw(/usr/bin/xar -x -f %(src)s)] ],
150              
151             # Requires: cabextract
152             [ 'application/vnd.ms-cab-compressed', qr{cab}, [qw(/usr/bin/cabextract -q %(src)s)] ],
153              
154             # Requires: p7zip
155             [ 'application/x-7z-compressed', qr{7z}, [qw(/usr/bin/7z x -pPass -y %(src)s)] ],
156              
157             # Requires: tar rpm cpio
158             [ 'application=tar', qr{(?:tar|gem)}, [\&_locate_tar, qw(-xf %(src)s)] ],
159             [ 'application=tar+bzip2', qr{(?:tar\.bz2|tbz)}, [\&_locate_tar, qw(-jxf %(src)s)] ],
160             [ 'application=tar+gzip', qr{t(?:ar\.gz|gz)}, [\&_locate_tar, qw(-zxf %(src)s)] ],
161             # [ 'application=tar+gzip', qr{t(?:ar\.gz|gz)}, [qw(/home/testy/src/C/slowcat)], qw(< %(src)s |), [\&_locate_tar, qw(-zxf -)] ],
162             [ 'application=tar+lzma', qr{tar\.(?:xz|lzma|lz)}, [qw(/usr/bin/lzcat)], qw(< %(src)s |), [\&_locate_tar, qw(-xf -)] ],
163             [ 'application=tar+lzma', qr{tar\.(?:xz|lzma|lz)}, [qw(/usr/bin/xz -dc -f %(src)s)], '|', [\&_locate_tar, qw(-xf -)] ],
164             [ 'application=rpm', qr{(?:src\.r|s|r)pm}, [qw(/usr/bin/rpm2cpio %(src)s)], '|', [\&_locate_cpio_i] ],
165             [ 'application=cpio', qr{cpio}, [\&_locate_cpio_i], qw(< %(src)s) ],
166              
167             # Requires: poppler-tools
168             [ 'application=pdf', qr{pdf}, [qw(/usr/bin/pdftotext %(src)s %(destfile)s.txt)], '&', [qw(/usr/bin/pdfimages -j %(src)s pdfimages)] ],
169             );
170              
171             ## CAUTION keep _my_shell_quote in sync with all _locate_* functions.
172             sub _locate_tar
173             {
174 0     0   0 my $self = shift;
175 0 0       0 return @{$self->{_locate_tar}} if defined $self->{_locate_tar};
  0         0  
176              
177             # cannot use tar -C %(destdir)s, we rely on being chdir'ed inside already :-)
178             # E: /bin/tar: /tmp/xxx/_VASn/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_: Cannot chdir: Permission denied
179              
180 0 0       0 my @tar = (-f '/bin/tar' ? '/bin/tar' : '/usr/bin/tar' );
181             ## osc co loves to create directories with : in them.
182             ## Tell tar to accept such directories as directores.
183 0 0       0 push @tar, "--force-local"
184             unless $self->run([@tar, "--force-local", "--help"], { out_err => '/dev/null' });
185 0 0       0 push @tar, "--no-unquote"
186             unless $self->run([@tar, "--no-unquote", "--help"], { out_err => '/dev/null'});
187              
188 0         0 $self->{_locate_tar} = \@tar;
189 0         0 return @tar;
190             }
191              
192             sub _locate_cpio_i
193             {
194 0     0   0 my $self = shift;
195 0 0       0 return @{$self->{_locate_cpio_i}} if defined $self->{_locate_cpio_i};
  0         0  
196              
197 0         0 my @cpio_i = ('/usr/bin/cpio', '-idm');
198 0 0       0 $cpio_i[1] .= 'u'
199             unless run(['/usr/bin/cpio', '-idmu', '--usage'], {out_err => '/dev/null'});
200 0 0       0 push @cpio_i, '--sparse'
201             unless run([@cpio_i, '--sparse', '--usage'], {out_err => '/dev/null'});
202 0 0       0 push @cpio_i, '--no-absolute-filenames'
203             unless run([@cpio_i, '--no-absolute-filenames', '--usage'], {out_err => '/dev/null'});
204 0 0       0 push @cpio_i, '--force-local'
205             unless run([@cpio_i, '--force-local', '--usage'], {out_err => '/dev/null'});
206              
207 0         0 @{$self->{_locate_cpio_i}} = \@cpio_i;
  0         0  
208 0         0 return @cpio_i;
209             }
210              
211             =head1 SYNOPSIS
212              
213             This perl module comes with an executable script:
214              
215             /usr/bin/file_unpack -h
216              
217             /usr/bin/file_unpack [-1] [-m] ARCHIVE_FILE ...
218              
219              
220             File::Unpack is an unpacker for archives and files
221             (bz2/gz/zip/tar/cpio/iso/rpm/deb/cab/lzma/7z/rar ... pdf/odf) based on
222             MIME types. We call it strong, because it is not fooled by file suffixes, or
223             multiply wrapped packages. It recursively descends into each archive found
224             until it finally exposes all unpackable payload contents.
225              
226             A logfile can be written, precisely describing MIME types and unpack actions.
227              
228             use File::Unpack;
229              
230             my $log;
231             my $u = File::Unpack->new(logfile => \$log);
232              
233             my $m = $u->mime('/etc/init.d/rc');
234             print "$m->[0]; charset=$m->[1]\n";
235             # text/x-shellscript; charset=us-ascii
236              
237             map { print "$_->{name}\n" } @{$u->mime_helper()};
238             # application/%rpm
239             # application/%tar+gzip
240             # application/%tar+bzip2
241             # ...
242              
243             $u->unpack("inputfile.tar.bz2");
244             while ($log =~ m{^\s*"(.*?)":}g) # it's JSON.
245             {
246             print "$1\n"; # report all files unpacked
247             }
248              
249             ...
250              
251             Most of the known archive file formats are supported. Shell-script-style
252             plugins can be added to support additinal formats.
253              
254             Helper shell-scripts can be added to support additional mime-types. Example:
255              
256             F<< $ echo "ar x $1" > /usr/share/File-Unpack/helper/application=x-debian-package >>
257              
258             F<< $ chmod a+x /usr/share/File-Unpack/helper/application=x-debian-package >>
259              
260             This example creates a trivial external equivalent of the builtin MIME helper for *.deb packages.
261             For details see the documentation of the C method.
262              
263             C examines the contents of an archive file or directory using an extensive
264             mime-type analysis. The contents is unpacked recursively to the given destination
265             directory; a listing of the unpacked files is reported through the built in
266             logging facility during unpacking. Most common archive file formats are handled
267             directly; more can easily be added as mime-type helper plugins.
268              
269             =head1 SUBROUTINES/METHODS
270              
271             =head2 new
272              
273             my $u = new(destdir => '.', logfile => \*STDOUT, maxfilesize => '2G', verbose => 1,
274             world_readable => 0, one_shot => 0, no_op => 0, archive_name_as_dir => 0,
275             follow_file_symlinks => 0,
276             log_params => {}, log_type => 'JSON');
277              
278             Creates an unpacker instance. The parameter C must be a writable location; all output
279             files and directories are placed inside this destdir. Subdirectories will be
280             created in an attempt to reflect the structure of the input. Destdir defaults
281             to the current directory; relative paths are resolved immediatly, so that
282             chdir() after calling new is harmless.
283              
284             The parameter C can be a reference to a scalar, a filename, or a filedescriptor.
285             The logfile starts with a JSON formatted prolog, where all lines start
286             with printable characters.
287             For each file unpacked, a one line record is appended, starting with a single
288             whitespace ' ', and terminated by "\n". The format is a JSON-encoded C<< "key":
289             {value},\n >> pair, where key is the filename, and value is a hash including 'mime',
290             'size', and other information.
291             The logfile is terminated by an epilog, where each line starts with a printable character.
292             As part of the epilog, a dummy file named "\" with an empty hash is added to the list.
293             It should be ignored while parsing.
294             Per default, the logfile is sent to STDOUT.
295              
296             The parameter C is a safeguard against compressed sparse files and
297             test-files for archivers. Such files could easily fill up any available disk
298             space when unpacked. Files hitting this limit will be silently truncated.
299             Check the logfile records or epilog to see if this has happened. BSD::Resource
300             is used manipulate RLIMIT_FSIZE.
301              
302             The parameter C can optionally be set to non-zero, to limit unpacking
303             to one step of unpacking. Unpacking of well known compressed archives like
304             e.g. '.tar.bz2' is considered one step only. If uncompressing an archive is
305             considered an extra step before unpacking the archive depends on the configured
306             mime helpers.
307              
308             The parameter C causes unpack() to only print one shell command to
309             STDOUT and exit. This implies one_shot=1.
310              
311             The parameter C causes unpack() change all directories to 0755,
312             and all files to 444. Otherwise 0700 and 0400 (user readable) is asserted.
313              
314             The parameter C causes some or all symlinks to files
315             to be included.
316             A value of 1 follows symlinks that exist in the input directory and point to a file.
317             This has no effect if the input is an archive file. A value of 2 also follows symlinks
318             that were extracted from archives. CAUTION: This may cause unpack() to visit
319             files or archives elsewhere in the local filesystem.
320             Directory symlinks are always excluded.
321              
322             The parameter C causes the unpacker to store all unpacked
323             files inside a directory with the same name as their archive.
324              
325             The default depends on how many files are unpacked from the archive: If exactly one
326             file (or one toplevel directory) is unpacked, then no extra directory is used.
327             E.g. F would unpack to F or
328             F would unpack to F and no files outside this directory.
329             If multiple files (or directories) are unpacked, and the suffix of the archive can
330             be removed with the C of its C, then the
331             shortened name is used as a directory. E.g. F would unpack to
332             F. Otherwise F<._> is appended to the archive name. E.g. F would unpack to
333             F.
334              
335             In any case, the suffix F<._> or F<._B> is used to avoid conflicts with
336             already existing names where B is a numeric value.
337              
338             =head2 exclude
339              
340             exclude(add => ['.svn', '*.orig' ], del => '.svn', force => 1, follow_file_symlinks => 0)
341              
342             Defines the exclude-list for unpacking. This list is advisory for the MIME helpers.
343             The exclude-list items are shell glob patterns, where '*' or '?' never match '/'.
344              
345             You can use force to have any of these removed after unpacking.
346             Use (vcs => 1) to exclude a long list of known version control system directories, use (vcs => 0) to remove them.
347             The default is C<< exclude(empty => 1) >>, which is the same as C<< exclude(empty_file => 1, empty_dir => 1) >> --
348             having the obvious meaning.
349              
350             (re => 1) returns the active exclude-list as a regexp pattern.
351             Otherwise C always returns the list as an array ref.
352              
353             Some symbolic links are included if {follow_file_symlinks} is nonzero. For details see C<>.
354              
355             If exclude patterns were effective, or if symlinks, fifos, sockets, ... were encountered during unpack(),
356             the logfile contains an additional 'skipped' keyword with statistics.
357              
358             =cut
359              
360             sub _glob_list_re
361             {
362 3     3   3 my @re;
363 3 50       16 return unless @_;
364 3         9 for my $text (@_)
365             {
366             # Taken from pdb2perl:glob2re() and adapted, to not match slashes in wildcards.
367             # This should be kept compatible with tar --exclude .
368              
369 25         76 $text =~ s{([\.\(\)\[\]\{\}])}{\\$1}g; ## protect magic re characters.
370 25         34 $text =~ s{\*}{[^/]*}g; ## * -> [^/]*
371 25         22 $text =~ s{\?}{[^/]}g; ## ? -> [^/]
372 25         33 push @re, $text;
373             }
374 3         22 return '(/|^)(' . join('|', @re) . ')(/|$)';
375             }
376              
377             sub _not_excluded
378             {
379 12     12   13 my $self = shift;
380 12         20 my ($dir, $file) = @_;
381              
382 12 50       30 return 1 unless my $re = $self->{exclude}{re};
383              
384 12   50     22 $dir ||= '';
385 12 50       40 $dir .= '/' unless $dir =~ m{/$};
386 12         20 $file = $dir . $file;
387              
388 12 100       153 return 0 if $file =~ m{$re};
389 2         20 return 1;
390             }
391              
392             sub exclude
393             {
394 3     3 1 24 my $self = shift;
395 3 50       140 my %opt = $#_ ? @_ : (add => $_[0]);
396            
397             # ADD to this list from: https://build.opensuse.org/project/show?project=devel%3Atools%3Ascm
398 3         13 my @vcs = qw(SCCS RCS CVS .svn .git .hg .osc);
399              
400 3 50       13 $opt{add} = [ $opt{add} ] unless ref $opt{add};
401 3 50       15 $opt{del} = [ $opt{del} ] unless ref $opt{del};
402              
403 3 50 33     24 push @{$opt{add}}, @vcs if defined $opt{vcs} and $opt{vcs};
  3         12  
404 3 50 33     28 push @{$opt{del}}, @vcs if defined $opt{vcs} and !$opt{vcs};
  0         0  
405              
406              
407 3         6 for my $a (@{$opt{add}})
  3         10  
408             {
409 25 50       87 $self->{exclude}{list}{$a}++ if defined $a;
410             }
411            
412 3         7 for my $a (@{$opt{del}})
  3         11  
413             {
414 3 50       12 delete $self->{exclude}{list}{$a} if defined $a;
415             }
416              
417 3         6 my @list = sort keys %{$self->{exclude}{list}};
  3         33  
418 3         13 $self->{exclude}{re} = _glob_list_re(@list);
419              
420 3 50       11 $opt{empty_dir} = $opt{empty_file} = $opt{empty} if defined $opt{empty};
421              
422 3         7 for my $o (qw(empty_file empty_dir force))
423             {
424 9 50       25 $self->{exclude}{$o} = $opt{$o} if defined $opt{$o};
425             }
426              
427 3 50       9 $self->{follow_file_symlinks} = $opt{follow_file_symlinks}
428             if defined $opt{follow_file_symlinks};
429              
430 3 100       21 return $opt{re} ? $self->{exclude}{re} : \@list;
431             }
432              
433             =begin private
434              
435             =item log, logf, loggable_pathname
436              
437             The C method is used by C to send text to the logfile.
438             The C method takes a filename and a hash, and logs a JSON formatted line.
439             The trailing newline character of a line is delayed; it is printed by the next call to
440             C or C. In case of C, a comma is emitted before the newline
441             from the second call onward.
442              
443             The C shortens a path to be relative to either
444             $self->{destdir} or $self->{input} unless $self->{log_fullpath} is true.
445             If a hash is provided as a second parameter and the path was found to be relative
446             to $self->{input}, then an entry { 'srcdir' => 'input' } is added to this hash.
447              
448             =end private
449              
450             =cut
451             sub log
452             {
453 4     4 1 8 my ($self, $text) = @_;
454 4 50       13 if (my $fp = $self->{lfp})
455             {
456 4         5 my $oldpos = eval { $fp->tell; }; # old perl at SLES11 has no IO::Handle::tell()
  4         34  
457 4 50       39 $fp->write($text) or die "log($self->{logfile}): write failed: $!\n";
458 4         580 my $r = eval { $fp->tell - $oldpos; };
  4         13  
459              
460             ## We do not expect any multibyte utf8 issues in here. It is plain 7-bit JSON.
461             ## E.g. /dev/null is not seekable. Be forgiving.
462 4 50 33     38 die "$oldpos,$r=log($self->{logfile}): write failed: $text\n" if $r and $r != length($text);
463 4         17 $self->{lfp_printed}++;
464             }
465             }
466              
467             sub loggable_pathname
468             {
469 0     0 1 0 my ($self, $file, $hash) = @_;
470              
471 0 0       0 unless ($self->{log_fullpath})
472             {
473             # very frequently, files are inside the destdir
474 0 0       0 unless ($file =~ s{^\Q$self->{destdir}\E/}{})
475             {
476             # less frequently, archives are logged inside the input dir
477 0 0       0 if ($self->{input})
478             {
479 0 0       0 if ($file =~ s{^\Q$self->{input}\E/}{\./input/./})
480             {
481 0 0       0 $hash->{srcdir} = 'input' if ref $hash eq 'HASH';
482             }
483             }
484             }
485             }
486 0         0 return $file;
487             }
488              
489             sub logf
490             {
491 0     0 1 0 my ($self,$file,$hash,$suff) = @_;
492 0 0       0 $suff = "" unless defined $suff;
493 0   0     0 my $json = $self->{json} ||= JSON->new()->ascii(1);
494 0         0 $file = $self->loggable_pathname($file, $hash);
495 0 0       0 if (my $fp = $self->{lfp})
496             {
497 0 0       0 if ($self->{log_type} eq 'plain')
498             {
499 0         0 my $str = $file . ' (';
500 0 0       0 $str .= $hash->{mime} if defined $hash->{mime};
501 0         0 $str .= ')';
502 0 0       0 $str = "# $str -> " . $hash->{unpacked} if $hash->{unpacked};
503 0         0 $str .= "\n";
504 0         0 $self->log($str);
505             }
506             else
507             {
508 0 0       0 $self->log(qq[{ "oops": "logf used before prolog??",\n"unpacked_files":{\n])
509             unless $self->{lfp_printed}; # sysseek($fp, 0, 1); # }} there is no systell() ...
510 0         0 my $str = $json->encode({$file => $hash});
511 0         0 $str =~ s{^\{}{}s;
512 0         0 $str =~ s{\}$}{}s;
513 0         0 my $pre = " ";
514 0 0       0 $pre = ",\n " if $self->{logf_continuation}++;
515 0 0       0 die "logf failed to encode newline char: $str\n" if $str =~ m{(?:\n|\r)};
516 0         0 $self->log("$pre$str$suff");
517             }
518             }
519             }
520              
521             $SIG{'XFSZ'} = sub
522             {
523             print STDERR "soft RLIMIT_FSIZE exceeded. SIGXFSZ recieved. Exiting\n";
524             exit;
525             };
526              
527             # if this returns 0, we test again and call it again, possibly.
528             # if this returns nonzero, we just continue.
529             sub _default_fs_warn
530             {
531 0     0   0 carp "Filesystem (@_) is almost full.\n $0 paused for 30 sec.\n";
532 0         0 sleep(30);
533 0         0 return 0;
534             }
535              
536             ## returns 1, if enough space free.
537             ## returns 0, if warn-method was called, and returned nonzero
538             ## returns -1, if no warn method
539             ## or does not return at all, and rechecks the status
540             ## with at least on second delay, if warn-method returns 0.
541             sub _fs_check
542             {
543 0     0   0 my ($self, $needed_b, $needed_i, $needed_p) = @_;
544 0 0       0 $needed_b = '1M' unless defined $needed_b; # bytes
545 0 0       0 $needed_i = 100 unless defined $needed_i; # inodes
546 0 0       0 $needed_p = 1.0 unless defined $needed_p; # percent
547 0         0 $needed_b = _bytes_unit($needed_b);
548              
549 0         0 my $DIR;
550 0 0 0     0 open $DIR, "<", $self->{destdir} or
551             opendir $DIR, $self->{destdir} or return;
552             ## fileno() does not work with opendir() handles.
553 0 0       0 my $fd = fileno($DIR); return unless defined $fd;
  0         0  
554              
555 0         0 for (;;)
556             {
557 0         0 my $st = eval { [ fstatvfs($fd) ] };
  0         0  
558 0         0 my $total_b = $st->[1] * $st->[2]; # f_frsize * f_blocks
559 0         0 my $free_b = $st->[0] * $st->[4]; # f_bsize * f_bavail
560 0         0 my $free_i = $st->[7]; # f_favail
561 0   0     0 my $perc = 100.0 * ($total_b - $free_b) / ($total_b||1);
562              
563 0 0 0     0 return 1 if $free_b >= $needed_b &&
      0        
564             $free_i >= $needed_i &&
565             (100-$perc > $needed_p);
566            
567 0 0       0 return -1 unless $self->{fs_warn};
568 0         0 my $w = $self->{fs_warn}->($self->{destdir}, $perc, $free_b, $free_i);
569 0 0       0 return 0 if $w;
570 0         0 sleep 1;
571             }
572             }
573              
574             sub new
575             {
576 7     7 1 2654 my $self = shift;
577 7   33     54 my $class = ref($self) || $self;
578 7 50       43 my %obj = (ref $_[0] eq 'HASH') ? %{$_[0]} : @_;
  0         0  
579              
580 7 100       32 $obj{verbose} = 1 unless defined $obj{verbose};
581 7   100     43 $obj{destdir} ||= '.';
582 7   100     37 $obj{logfile} ||= \*STDOUT;
583 7   50     36 $obj{log_type} ||= 'json'; # or 'plain'
584 7         21 $obj{log_type} = lc $obj{log_type};
585 7 50 50     57 $obj{maxfilesize} = $ENV{'FILE_UNPACK_MAXFILESIZE'}||'2.5G' unless defined $obj{maxfilesize};
586 7         26 $obj{maxfilesize} = _bytes_unit($obj{maxfilesize});
587 7         51 $ENV{'FILE_UNPACK_MAXFILESIZE'} = $obj{maxfilesize}; # so that children see the same.
588              
589 7         271 mkpath($obj{destdir}); # abs_path is unreliable if destdir does not exist
590 7         446 $obj{destdir} = Cwd::fast_abs_path($obj{destdir});
591 7         47 $obj{destdir} =~ s{(.)/+$}{$1}; # assert no trailing '/'.
592              
593             # used in unpack() to jail mime_helpers deep inside destdir:
594 7 50       34 $obj{dot_dot_safeguard} = 20 unless defined $obj{dot_dot_safeguard};
595 7   50     42 $obj{jail_chmod0} ||= 0;
596             # used in unpack, print only:
597 7   50     39 $obj{no_op} ||= 0;
598             # used in unpack, blocks recursion after archive unpacking:
599 7   33     34 $obj{one_shot} ||= $obj{no_op};
600              
601             # With $self->{within_archives} we know the difference between symlinks found in
602             # the given repository or symlinks that were unpacked from an archive.
603             # Those from an archive are followed only with follow_file_symlinks == 2.
604 7   50     40 $obj{follow_file_symlinks} ||= 0;
605              
606 7 100 66     436 warn "WARNING: We are running as root: Malicious archives may clobber your filesystem.\n" if $obj{verbose} and !$>;
607              
608 7 100 100     64 if (ref $obj{logfile} eq 'SCALAR' or !(ref $obj{logfile}))
609             {
610 3 50   1   80353 open $obj{lfp}, ">", $obj{logfile} or croak "open logfile $obj{logfile} failed: $!\n";
  1         6  
  1         1  
  1         6  
611             }
612             else
613             {
614 4         14 $obj{lfp} = $obj{logfile};
615             }
616             # make $obj{lfp} unbuffered, so that other processes can read line by line...
617 7         994 $obj{lfp}->autoflush(1);
618 7         358 $obj{lfp_printed} = 0;
619              
620 7         22 $obj{readable_file_modes} = [ 0400 ];
621 7         17 $obj{readable_dir_modes} = [ 0700, 0500 ];
622              
623 7 50       32 if ($obj{world_readable})
624             {
625 0         0 unshift @{$obj{readable_file_modes}}, 0444;
  0         0  
626 0         0 unshift @{$obj{readable_dir_modes}}, 0755;
  0         0  
627             }
628              
629 7 50       25 if ($obj{maxfilesize})
630             {
631             eval
632 7         11 {
633 9     9   80 no strict;
  9         21  
  9         2469  
634             # helper/application=x-shellscript calls File::Unpack->new(), with defaults...
635 7         74 my @have = BSD::Resource::getrlimit(RLIMIT_FSIZE);
636 7 50 33     6787 if ($have[0] == RLIM_INFINITY or $have[0] > $obj{maxfilesize})
637             {
638             # if RLIM_INFINITY is seen as an attempt to increase limits, we would fail. Ignore this.
639 7 50 33     206 BSD::Resource::setrlimit(RLIMIT_FSIZE, $obj{maxfilesize}, RLIM_INFINITY) or
640             BSD::Resource::setrlimit(RLIMIT_FSIZE, $obj{maxfilesize}, $obj{maxfilesize}) or
641             warn "RLIMIT_FSIZE($obj{maxfilesize}), limit=($have[0],$have[1]) failed\n";
642             }
643             };
644 7 50       2179 if ($@)
645             {
646 0         0 carp "WARNING maxfilesize=$obj{maxfilesize} ignored:\n $@ $!\n Maybe package perl-BSD-Resource is not installed??\n\n";
647             }
648             }
649              
650 7 50       48 $obj{minfree}{factor} = 10 unless defined $obj{minfree}{factor};
651 7 50       33 $obj{minfree}{bytes} = '1M' unless defined $obj{minfree}{bytes};
652 7 50       27 $obj{minfree}{percent} = '1%' unless defined $obj{minfree}{percent};
653 7   50     63 minfree(\%obj, warning => $obj{fs_warn}||\&_default_fs_warn);
654              
655 7 50       38 $obj{exclude}{empty_dir} = 1 unless defined $obj{exclude}{empty_dir};
656 7 50       28 $obj{exclude}{empty_file} = 1 unless defined $obj{exclude}{empty_file};
657              
658 7         24 $self = bless \%obj, $class;
659              
660 7         19 for my $h (@builtin_mime_helpers)
661             {
662 189         381 $self->mime_helper(@$h);
663             }
664 7 50       41 $obj{helper_dir} = _default_helper_dir unless exists $obj{helper_dir};
665 7 50 33     221 $self->mime_helper_dir($obj{helper_dir}) if defined $obj{helper_dir} and -d $obj{helper_dir};
666              
667 7 50       50 unless ($ENV{PERL5LIB})
668             {
669             # in case we are using non-standard perl lib dirs, put them into the environment,
670             # so that any helper scripts see them too. They might need them, if written in perl.
671              
672 9     9   50 use Config;
  9         14  
  9         76324  
673 7         4397 my $pat = qr{^(?:\Q$Config{vendorlib}\E|\Q$Config{sitelib}\E|\Q$Config{privlib}\E)\b};
674 7         17468 my @add; # all dirs, that come before the standard dirs.
675 7         26 for my $i (@INC)
676             {
677 56 100       186 last if $i =~ m{$pat};
678 49         70 push @add, $i;
679             }
680 7 50       118 $ENV{PERL5LIB} = join ':', @add if @add;
681             }
682              
683 7         39 return $self;
684             }
685              
686             sub DESTROY
687             {
688 7     7   5975 my $self = shift;
689             # when unpack() processes an input, it should delete {lfp} afterwards.
690             # Added some 'or' cases, as $self->{input} might be empty, although we had processed an input.
691             #
692             # We rather catch an error, than produce incomplete output.
693             # This happens with ksh/ast-base.2012-08-01.tar.bz2 after unpack('.../ast-base.2012-08-01/src/cmd/pax/data/a'): not much file or directory
694             #
695 7 100 66     176 if (($self->{input} or
      66        
696             ($self->{lfp_printed}||0) or
697             ($self->{recursion_level}||0)) and $self->{lfp})
698             {
699 2 50       9 if ($self->{log_type} eq 'plain')
700             {
701             # pass
702             }
703             else
704             {
705 2 50       6 $self->log(sprintf(qq[{"pid":"%d", "unpacked":{], $$)) unless $self->{lfp_printed};
706             }
707 2   50     8 my $r = $self->{recursion_level}||0;
708              
709             # this should never happen.
710             # always delete $self->{lfp} manually, when done.
711             ## {{
712 2         3 my $msg = "unexpected destructor seen";
713 2 50       9 $msg = join('; ', @{$self->{error}}) if $self->{error};
  0         0  
714 2 50       5 if ($self->{log_type} eq 'plain')
715             {
716 0         0 $self->log("# error: (l=$self->{lfp_printed},r=$r): $msg\n");
717             }
718             else
719             {
720 2         25 $self->log(qq[\n}, "error":"(l=$self->{lfp_printed},r=$r): $msg"}\n]);
721             }
722 2 50       35 close $self->{lfp} if $self->{lfp} ne $self->{logfile};
723 2         12 delete $self->{lfp};
724 2         6 delete $self->{lfp_printed};
725             }
726 7 50       485 if ($self->{configdir})
727             {
728 0         0 rmtree($self->{configdir});
729 0         0 delete $self->{configdir};
730             }
731             }
732              
733             =head2 unpack
734              
735             $u->unpack($archive, [$destdir])
736              
737             Determines the contents of an archive and recursivly extracts its files.
738             An archive may be the pathname of a file or directory. The extracted contents will be
739             stored in F, where dest_name is the filename
740             component of archive without any leading pathname components, and possibly
741             stripped or added suffix. (Subdir defaults to ''.) If archive is a directory,
742             then dest_name will also be a directory. If archive is a file, the type of
743             dest_name depends on the type of packing: If the archive expands to multiple
744             files, dest_name will be a directory, otherwise it will be a file. If a file of
745             the same name already exists in the destination subdir, an additional subdir
746             component is created to avoid any conflicts.
747              
748             For each extracted file, a record is written to the logfile.
749             When unpacking is finished, the logfile contains one valid JSON structure.
750             Unpack achieves this by writing suitable prolog and epilog lines to the logfile.
751             The logfile can also be parsed line by line. All file records is one line and start
752             with a ' ' whitespace, and end in a ',' comma. Everything else is prolog or epilog.
753              
754             The actual unpacking is dispatched to MIME type specific helpers,
755             selected using C. A MIME helper can either be built-in code, or an
756             external shell-script found in a directory registered with
757             C. The standard place for external helpers is
758             F; it can be changed by the environment variable
759             F or the C parameter C.
760              
761             The naming of helper scripts is described under C.
762              
763             A MIME helper must have executable permission and is called with 6 parameters:
764             source_path, destfile, destination_path, mimetype, description, and config_dir.
765             Note, that destination_path is a freshly created empty working directory, even
766             if the unpacker is expected to unpack only a single file. The unpacker is
767             called after chdir into destination_path, so you usually do not need to
768             evaluate the third parameter.
769              
770             The directory C contains unpack configuration in .sh, .js and possibly
771             other formats. A MIME helper may use this information, but need not.
772             All data passed into C is reflected there, as well as the active exclude-list.
773             Using the config information can help a MIME helper to skip unwanted
774             work or otherwise optimize unpacking.
775              
776             C monitors the available filesystem space in destdir. If there is less space
777             than configured with C, a warning can be printed and unpacking is
778             optionally paused. It also monitors the MIME helpers progress reading the archive
779             at source_path and reports percentages to STDERR (if verbose is 1 or more).
780              
781             After the MIME helper is finished, C examines the files it created.
782             If it created no files in F, an error is reported, and the
783             F may be passed to other unpackers, or finally be added to the log as is.
784              
785             If the MIME helper wants to express that F is already unpacked as far as possible
786             and should be added to the log without any error messages, it creates a symbolic link
787             F pointing to F.
788              
789              
790             The system considers replacing the
791             directory with a file, if all of the following conditions are met:
792              
793             =over
794              
795             =item *
796              
797             There is exactly one file in the directory.
798              
799             =item *
800              
801             The file name is identical with the directory name,
802             except for one changed or removed
803             suffix-word. (*.tar.gz -> *.tar; or *.tgz -> *.tar)
804              
805             =item *
806              
807             The file must not already exist in the parent directory.
808              
809             =back
810              
811             C prepares 20 empty subdirectory levels and chdirs the unpacker
812             in there. This number can be adjusted using C<< new(dot_dot_safeguard => 20) >>.
813             A directory 20 levels up from the current working dir has mode 0 while
814             the MIME helper runs. C can optionally chmod(0) the parent of the subdirectory
815             after it chdirs the unpacker inside. Use C<< new(jail_chmod0 => 1) >> for this, default
816             is off. If enabled, a MIME helper trying to place files outside of the specified
817             destination_path may receive 'permission denied' conditions.
818              
819             These are special hacks to keep badly constructed
820             tar-balls, cpio-, or zip-archives at bay.
821              
822             Please note, that this can help against archives containing relative paths
823             (like starting with '../../../foo'), but will be ineffective with absolute paths
824             (starting with '/foo').
825             It is the responsibility of MIME helpers to not create absolute paths;
826             C should not be run as the root user, to minimize the risk of
827             compromising the root filesystem.
828              
829             A missing MIME helper is skipped, and subsequent helpers may take effect. A
830             MIME helper is expected to return an exit status of 0 upon success. If it runs
831             into a problem, it should print lines
832             starting with the affected filenames to stderr.
833             Such errors are recorded in the log with the unpacked archive, and as far as
834             files were created, also with these files.
835              
836             Symbolic links are ignored while unpacking.
837              
838             Currently you can call C only once.
839              
840             =cut
841              
842             sub unpack
843             {
844             ## as long as $archive is outside $self->{destdir}, we construct our destdir by
845             ## replacing $self->{input_dir} with $self->{destdir}.
846             ## This $self->{input_dir} must be created and kept constant at the earliest
847             ## possible call.
848             ## When the $archive is inside $self->{destdir}, we do not use $self->{input_dir},
849             ## we then use the current $in_dir as destdir.
850             ##
851             ## Whenever an archive path outside $self->{destdir} is found,
852             ## it is first passed through Cwd::fast_abs_path before any other processing occurs.
853             ##
854 15     15 1 1082 my ($self, $archive, $destdir) = @_;
855 15 100       35 $destdir = $self->{destdir} unless defined $destdir;
856              
857 15 50       75 $destdir = $1 if $destdir =~ m{^(.*)$}s; # brute force untaint
858              
859 15 50 100     45 if (($self->{recursion_level}||0) > $RECURSION_LIMIT)
860             {
861 0         0 push @{$self->{error}}, "unpack('$archive','$destdir'): recursion limit $RECURSION_LIMIT";
  0         0  
862             ## this is only an emergency stop.
863 0         0 return 1;
864             }
865              
866 15 50 66     106 if ($archive !~ m{^/} or $archive !~ m{^\Q$self->{destdir}\E/})
867             {
868             # Cwd::fast_abs_path($archive) not only makes nice absolute paths, but it also expands
869             # file symlinks. This is a bad idea for two reasons:
870             # * when we allow {follow_file_symlinks} the link destination gets into the log file,
871             # rather than the (expected) link itself.
872             # * Also, this could easily trigger "path escaped" below .
873             ######
874 15 50 33     38 if ($self->{follow_file_symlinks} && $archive =~ m{^(.*)/(.*?)$})
875             {
876             # we solve both issues by doing this:
877             # chop off the filename; expand the path; re-add the filename.
878 0         0 my ($a_path, $a_file) = ($1,$2);
879 0 0       0 $a_path = Cwd::fast_abs_path($a_path) if -e $a_path;
880 0         0 $archive = $a_path . '/' . $a_file;
881             }
882             else
883             {
884 15 50       1255 $archive = Cwd::fast_abs_path($archive) if -e $archive;
885             }
886             }
887              
888 15         30 my $start_time = time;
889 15 100       49 if ($self->{recursion_level}++ == 0)
890             {
891 2 50       8 print STDERR "unpack: starting...\n" if $self->{verbose} > 1;
892             ## State that needs to be reset when (re)starting goes in here.
893             #
894             # CAUTION: recursion_level decrements again, as we return from unpack()
895             # how do we assert, that this code only runs at the start,
896             # and not once again at the end?
897 2         5 $self->{inside_archives} = 0;
898 2   33     66 $self->{json} ||= JSON->new()->ascii(1); # used often, create it unconditionally here and once.
899 2         6 $self->{iput} = $archive;
900 2         4 $self->{progress_tstamp} = $start_time;
901 2 50       21 ($self->{input_dir}, $self->{input_file}) = ($1, $2) if $archive =~ m{^(.*)/([^/]*)$};
902              
903 2 50       16 if ($self->{log_type} eq 'plain')
904             {
905             # pass
906             }
907             else
908             {
909             # logfile prolog
910 2         340 my $prolog = {destdir=>$self->{destdir}, fu=>$VERSION, pid=>$$, input => $archive, start => scalar localtime};
911 2 50       5 $prolog->{params} = $self->{log_params} if keys %{$self->{log_params}};
  2         13  
912 2         42 my $s = $self->{json}->encode($prolog);
913 2         81 $s =~ s@}$@, "unpacked":{\n@;
914 2         8 $self->log($s);
915             }
916             }
917              
918 15 50       164 unless (-e $archive)
919             {
920             # contstucted $archive wrongly
921             # e.g. we have 'pax/data/a/' instead of 'pax/data/_fu_3CEuA/a/'
922 0         0 push @{$self->{error}}, "unpack('$archive'): not much file or directory; ";
  0         0  
923 0         0 return 1;
924             }
925              
926 15 50       40 unless ($self->{input_dir})
927             {
928 0         0 push @{$self->{error}}, "unpack('$archive'); internal error: no {input_dir}";
  0         0  
929 0         0 return 1;
930             }
931              
932 15         22 my ($in_dir, $in_file) = ('/', '');
933 15 50       87 ($in_dir, $in_file) = ($1, $2) if $archive =~ m{^(.*/)([^/]*)$};
934              
935 15         17 my $inside_destdir = 1;
936 15         13 my $subdir = $in_dir; # remainder after stripping $orig_archive_prefix / $self->{destdir}
937 15 50       102 unless ($subdir =~ s{^\Q$self->{destdir}\E/+}{})
938             {
939 15         16 $inside_destdir = 0;
940 15 50       88 die "$archive path escaped. Neither inside original $self->{input_dir} nor inside destdir='$self->{destdir}'\n"
941             unless $subdir =~ s{^\Q$self->{input_dir}\E/+}{};
942             }
943              
944 15 50       29 print STDERR "unpack: r=$self->{recursion_level} in_dir=$in_dir, in_file=$in_file, destdir=$destdir\n" if $self->{verbose} > 1;
945              
946 15         13 my @missing_unpacker;
947              
948 15 50       31 if ($self->{progress_tstamp} + 10 < $start_time)
949             {
950 0   0     0 printf "T: %d files ...\n", $self->{file_count}||0;
951 0         0 $self->{progress_tstamp} = $start_time;
952             }
953              
954 15 100       321 if (-d $archive)
    50          
955             {
956 3         5 $self->_chmod_add($archive, @{$self->{readable_dir_modes}});
  3         14  
957 3 50       94 if (opendir DIR, $archive)
958             {
959 3 100       115 my @f = sort grep { $_ ne '.' && $_ ne '..' } readdir DIR;
  50         191  
960 3         40 closedir DIR;
961 3 50       14 print STDERR "dir = @f\n" if $self->{verbose} > 1;
962 3         8 for my $f (@f)
963             {
964 13 100 66     196 if ($self->{exclude}{re} && $f =~ m{$self->{exclude}{re}})
965             {
966 10         14 $self->{skipped}{exclude}++;
967             }
968 13         43 my $new_in = "$archive/$f";
969             ## if $archive is $inside_destdir, then $archive is normally indentical to $destdir.
970             ## ($inside_destdir means inside $self->{destdir}, actually)
971 13 100       14 my $new_destdir = $destdir; $new_destdir .= "/$f" if -d $new_in;
  13         179  
972 13         108 my $symlink_to_skip = -l $new_in;
973 13 50       24 my $dangeous_symlink = $self->{inside_archives} ? 1 : 0;
974 13 50 33     26 if ($symlink_to_skip and ($self->{follow_file_symlinks} > $dangeous_symlink))
975             {
976 0 0       0 $symlink_to_skip = 0 if -f $new_in;
977             # directory and dead symlinks we always skip.
978             # directory symlinks could cause us to recurse out of the current tree.
979             }
980              
981 13 50 66     132 if ($symlink_to_skip)
    50          
982             {
983             # test -l first, as -f could be also true here...
984 0 0       0 print STDERR "symlink $new_in: skipped\n" if $self->{verbose} > 1;
985 0         0 $self->{skipped}{symlink}++;
986             }
987             elsif (-f $new_in or -d _)
988             {
989 13         81 $self->unpack($new_in, $new_destdir);
990             }
991             else
992             {
993 0 0       0 print STDERR "special file $new_in: skipped\n" if $self->{verbose} > 1;
994 0         0 $self->{skipped}{device_node}++;
995             }
996 10         20 $self->{progress_tstamp} = time;
997             }
998             }
999             else
1000             {
1001 0         0 push @{$self->{error}}, "unpack dir ($archive) failed: $!";
  0         0  
1002             }
1003             }
1004             elsif (-f $archive)
1005             {
1006 12 100 66     31 if ($self->_not_excluded($subdir, $in_file) and
1007             !defined($self->{done}{$archive}))
1008             {
1009 2         5 $self->_chmod_add($archive, @{$self->{readable_file_modes}});
  2         14  
1010              
1011 2         10 my $m = $self->mime($archive);
1012 0         0 my ($h, $more) = $self->find_mime_helper($m);
1013 0         0 my $data = { mime => $m->[0] };
1014 0 0       0 if ($more)
1015             {
1016 0         0 $data->{found} = $more;
1017 0 0       0 push @missing_unpacker, @{$more->{missing}} if $more->{missing};
  0         0  
1018             }
1019              
1020 0 0 0     0 if ($m->[0] eq 'text/plain' or !$h)
1021             {
1022             # not really an archive.
1023 0 0       0 unless ($archive =~ m{^\Q$self->{destdir}\E/})
1024             {
1025 0 0       0 mkpath($destdir) unless $self->{no_op};
1026 0         0 my $destdir_in_file;
1027 0 0       0 $destdir_in_file = $1 if "$destdir/$in_file" =~ m{^(.*)$}s; # brute force untaint
1028              
1029 0 0       0 if (-e "$destdir_in_file")
1030             {
1031 0 0       0 print STDERR "unpack copy in: $destdir_in_file already exists, " if $self->{verbose};
1032 0         0 $destdir = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir);
1033 0 0       0 $destdir_in_file = $1 if "$destdir/$in_file" =~ m{^(.*)$}s; # brute force untaint
1034 0 0       0 print STDERR "using $destdir_in_file instead.\n" if $self->{verbose};
1035             }
1036 0 0       0 $data->{error} = "copy($archive): $!" unless File::Copy::copy($archive, $destdir_in_file);
1037 0         0 $self->logf($destdir_in_file => $data);
1038             }
1039             else
1040             {
1041 0         0 $self->logf($archive => $data);
1042             }
1043 0         0 $self->{file_count}++;
1044             }
1045             else
1046             {
1047             # really an archive.
1048 0 0       0 if ($self->{archive_name_as_dir})
1049             {
1050 0 0       0 print STDERR "archive_name_as_dir: expanding destdir $destdir\n" if $self->{verbose};
1051 0         0 $destdir = _unused_pathname($destdir, $in_file);
1052 0 0       0 print STDERR "archive_name_as_dir: to $destdir\n" if $self->{verbose};
1053             }
1054 0 0       0 mkpath($destdir) unless $self->{no_op};
1055 0 0       0 $self->{configdir} = $self->_prep_configdir() unless exists $self->{configdir};
1056              
1057             ## new_name is a suggestion for the mime_helper only.
1058 0         0 my $new_name = $in_file;
1059            
1060             # Either shorten the name from e.g. foo.txt.bz2 to foo.txt or append
1061             # something: foo.pdf to foo.pdf._;
1062             # Normally a suffix is appended by '.', but we also see '-' or '_' in real life.
1063 0 0 0     0 unless ($h->{suffix_re} and $new_name =~ s{[\._-]$h->{suffix_re}(?:\._\d*)?$}{}i)
1064             {
1065             # avoid unary notation of recursion couning. There may be a 256 char limit per
1066             # directory entry. Start counting in decimal, if two or more.
1067             # Hmm, the /e modifier is not mentioned in perlre, but it works. Is it deprecated??
1068 0         0 $new_name .= "._";
1069 0         0 $new_name =~ s{\._\._$}{\._2};
1070 0         0 $new_name =~ s{\._(\d+)\._$}{ "._".($1+1) }e;
  0         0  
1071             }
1072              
1073             ## if consumer of logf wants to do progress indication himself,
1074             ## then tell him what we do before we start. (Our timer tick code may be an alternative...)
1075             #
1076             # if ($archive =~ m{^\Q$self->{destdir}\E})
1077             # {
1078             # $self->logf($archive => { unpacking => $h->{fmt_p} });
1079             # }
1080            
1081 0         0 my ($unpacked, $diag) =
1082             $self->_run_mime_helper($h, $archive, $new_name, $destdir,
1083             $m->[0], $m->[2], $self->{configdir});
1084              
1085             # die Dumper "_run_mime_helper: $archive, $new_name, $destdir", readlink($unpacked), $unpacked;
1086              
1087 0 0 0     0 unless (ref $unpacked or -e $unpacked)
1088             {
1089 0         0 warn("archive=$archive, new_name=$new_name\n");
1090 0         0 die("assert -e '$unpacked'")
1091             }
1092              
1093 0 0       0 return 0 if $self->{no_op};
1094 0 0 0     0 if (ref $unpacked)
    0          
1095             {
1096             # a ref here means, something went wrong.
1097 0         0 $data->{failed} = $h->{fmt_p};
1098 0         0 $data->{error} = $unpacked->{error};
1099 0 0       0 $data->{stderr} = $unpacked->{stderr} if defined $unpacked->{stderr};
1100 0         0 $self->logf($archive => $data);
1101 0         0 $self->{file_count}++;
1102 0         0 $self->{helper_errors}++;
1103             }
1104             elsif (readlink($unpacked)||'' eq $archive)
1105             {
1106             # a symlink backwards means, there is nothing to unpack here. take it as is.
1107 0         0 unlink $unpacked;
1108 0 0       0 rmdir $destdir if $self->{archive_name_as_dir}; # now an empty dir.
1109 0         0 $data->{passed} = $h->{name};
1110 0         0 $data->{input} = $self->loggable_pathname($archive);
1111 0         0 $data->{cmd} = $h->{fmt_p};
1112             {
1113 0         0 local $Data::Dumper::Terse = 1;
  0         0  
1114 0         0 local $Data::Dumper::Indent = 0;
1115 0 0       0 $data->{diag} = Dumper $diag if $diag;
1116             }
1117              
1118 0 0       0 if ($archive =~ m{^\Q$self->{destdir}\E})
1119             {
1120             # if inside, we just flag it done and log it.
1121 0         0 $self->{done}{$archive} = $archive;
1122 0         0 $self->logf($archive => $data);
1123             }
1124             else
1125             {
1126             # if the archive itself was outside destdir,
1127             # we copy it in, flag it done there, and log it here.
1128 0 0       0 if (File::Copy::copy($archive, $unpacked))
1129             {
1130 0         0 $self->{done}{$archive} = $unpacked;
1131 0         0 $self->logf($unpacked => $data);
1132             }
1133             else
1134             {
1135 0         0 $data->{error} = "copy($archive, $unpacked): $!";
1136 0         0 $self->logf($archive => $data);
1137             }
1138             }
1139 0         0 $self->{file_count}++;
1140             }
1141             else
1142             {
1143             # normal case: mime helper placed all
1144             # in a directory (or file) called $unpacked
1145              
1146              
1147 0 0       0 if ($archive =~ m{^\Q$self->{destdir}\E})
1148             {
1149             # to delete it, we should know if it was created during unpack.
1150             }
1151 0         0 $self->{done}{$archive} = $unpacked;
1152 0         0 $data->{cmd} = $h->{fmt_p};
1153 0         0 $data->{unpacked} = $self->loggable_pathname($unpacked);
1154 0         0 $self->logf($archive => $data);
1155 0         0 $self->{file_count}++;
1156 0         0 $self->{inside_archives}++;
1157              
1158 0         0 my $newdestdir = $unpacked;
1159 0 0       0 $newdestdir =~ s{/+[^/]+}{} unless -d $newdestdir; # make sure it is a directory
1160 0 0       0 $newdestdir = $destdir unless $newdestdir =~ m{^\Q$self->{destdir}\E/}; # make sure it does not escape
1161 0 0       0 if ($self->{one_shot})
1162             {
1163 0         0 local $self->{mime_orcish};
1164 0         0 local $self->{mime_helper};
1165              
1166 0         0 $self->unpack($unpacked, $newdestdir);
1167             }
1168             else
1169             {
1170 0         0 $self->unpack($unpacked, $newdestdir);
1171             }
1172 0         0 $self->{progress_tstamp} = time;
1173 0         0 $self->{inside_archives}--;
1174             }
1175             }
1176             }
1177             }
1178             else
1179             {
1180 0         0 $self->logf($archive => { "skipped" => "special file"});
1181 0         0 $self->{file_count}++;
1182             }
1183              
1184 10 50       24 if (--$self->{recursion_level} == 0)
1185             {
1186 0 0       0 if ($self->{log_type} eq 'plain')
1187             {
1188 0         0 for my $m (@missing_unpacker)
1189             {
1190 0         0 $self->log("# missing unpacker: $m\n");
1191             }
1192 0         0 for my $s (sort keys %{$self->{skipped}})
  0         0  
1193             {
1194 0         0 $self->log("# skipped: $s: $self->{skipped}{$s}\n");
1195             }
1196 0 0       0 $self->log("# error: ".join('; ', @{$self->{error}})."\n") if $self->{error};
  0         0  
1197             }
1198             else
1199             {
1200 0         0 my $epilog = {end => scalar localtime, sec => time-$start_time };
1201 0 0       0 $epilog->{skipped} = $self->{skipped} if $self->{skipped};
1202 0 0       0 $epilog->{error}{msg} = $self->{error} if $self->{error}; # just in case some errors were non-fatal.
1203 0 0       0 $epilog->{error}{helper} = $self->{helper_errors} if $self->{helper_errors}; # counting
1204 0 0       0 $epilog->{missing_unpacker} = \@missing_unpacker if @missing_unpacker;
1205 0         0 my $s = $self->{json}->encode($epilog);
1206              
1207 0         0 $s =~ s@^{@\n},@;
1208 0         0 $self->log($s . "\n");
1209             }
1210              
1211 0 0       0 if ($self->{lfp} ne $self->{logfile})
1212             {
1213 0 0       0 close $self->{lfp} or carp "logfile write ($self->{logfile}) failed: $!\n";
1214             }
1215 0         0 delete $self->{lfp};
1216 0         0 delete $self->{lfp_printed};
1217             }
1218              
1219             # FIXME: should return nonzero if we had any unrecoverable errors.
1220 10 50       32 return $self->{error} ? 1 : 0;
1221             }
1222              
1223             # Try a few modes to add to the current permission bits.
1224             # The first mode that succeeds ends the list.
1225             sub _chmod_add
1226             {
1227 5     5   11 my ($self, $file, @modes) = @_;
1228 5 50       28 $file = $1 if $file =~ m{^(.*)$}m;
1229 5         68 my $perm = (stat $file)[2] & 07777;
1230 5         11 for my $m (@modes)
1231             {
1232 5 50       144 last if chmod($perm|$m, $file); # may or may not succeed. Harmless here.
1233             }
1234             }
1235              
1236             =head2 run
1237              
1238             $u->run([argv0, ...], @redir, ... { init => sub ..., in, out, err, watch, every, prog, ... })
1239              
1240             A general purpose fork-exec wrapper, based on IPC::Run. STDIN is closed, unless you specify
1241             an C<< in => >> as described in IPC::Run. STDERR and STDOUT are both printed to
1242             STDOUT, prefixed with 'E: ' and 'O: ' respectively, unless you specify C<< out => >>,
1243             C<< err => >>, or C<< out_err => >> ... for both.
1244              
1245             Using redirection operators in @redir takes precedence over the above in/out/err
1246             redirections. See also L. If you use the options in/out/err, you should
1247             restrict your redirection operators to the forms '<', '0<', '1>', '2>', or '>&' due
1248             to limitations in the precedence logic. Piping via '|' is properly recognized,
1249             but background execution '&' may confuse the precedence logic.
1250              
1251             This C method is completly independent of the rest of File::Unpack. It works both
1252             as a static function and as a method call.
1253             It is used internally by C, but is exported to be of use elsewhere.
1254              
1255             Init is run after construction of redirects. Calling chdir() in init thus has no
1256             effect on redirects with relative paths.
1257              
1258             Return value in scalar context is the first nonzero result code, if any. In list context
1259             all return values are returned.
1260             =cut
1261              
1262             sub run
1263             {
1264 0 0   0 1 0 shift if ref $_[0] ne 'ARRAY'; # toss $self object handle.
1265 0         0 my (@cmd) = @_;
1266 0         0 my $opt;
1267 0 0       0 $opt = pop @cmd if ref $cmd[-1] eq 'HASH';
1268              
1269 0         0 my $cmdname = $cmd[0][0]; $cmdname =~ s{^.*/}{};
  0         0  
1270              
1271             # run the command with
1272             # - STDIN closed, unless you specify an { in => ... }
1273             # - STDERR and STDOUT printed prefixed with 'E: ', 'O: ' to STDOUT,
1274             # unless you specify out =>, err =>, or out_err => ... for both.
1275 0   0     0 $opt->{in} ||= \undef;
1276 0   0     0 $opt->{out} ||= $opt->{out_err};
1277 0   0     0 $opt->{err} ||= $opt->{out_err};
1278 0   0 0   0 $opt->{out} ||= sub { print "O: ($cmdname) @_\n"; };
  0         0  
1279 0   0 0   0 $opt->{err} ||= sub { print "E: ($cmdname) @_\n"; };
  0         0  
1280              
1281 0         0 my $has_i_redir = 0;
1282 0         0 my $has_o_redir = 0;
1283 0         0 my $has_e_redir = 0;
1284              
1285             ## The ugly truth is, there might be multiple commands with pipes.
1286             ## We need to provide all of them with the proper redirects.
1287             ## A command that pipes somewhere else, has_o_redir outbound through the pipe.
1288             ## A command that is piped into, has_i_redir inbound from the pipe.
1289 0         0 my @run = ();
1290              
1291              
1292 0         0 for my $c (@cmd)
1293             {
1294 0 0       0 if (ref $c)
1295             {
1296 0         0 push @run, $c;
1297              
1298             # put init early, so that it is run, before any IO redirects access relative paths.
1299 0 0       0 push @run, init => $opt->{init} if $opt->{init};
1300 0         0 next; # don't look into argvs, but
1301             }
1302             # look only into redirection operators
1303 0 0       0 $has_i_redir++ if $c =~ m{^0?<};
1304 0 0       0 $has_o_redir++ if $c =~ m{^1?>};
1305 0 0       0 $has_e_redir++ if $c =~ m{^(?:2>|>&$)};
1306 0 0       0 if ($c eq '|')
1307             {
1308 0 0       0 push @run, '0<', $opt->{in} unless $has_i_redir;
1309 0         0 $has_i_redir = 'piped';
1310 0 0       0 push @run, "2>", $opt->{err} unless $has_e_redir;
1311 0         0 $has_e_redir = $has_o_redir = 0;
1312             }
1313 0         0 push @run, $c; # $1 if $c =~ m{^(.*)$}s; # brute force untaint
1314             }
1315              
1316 0 0       0 push @run, '0<', $opt->{in} unless $has_i_redir;
1317 0 0       0 push @run, "1>", $opt->{out} unless $has_o_redir;
1318 0 0       0 push @run, "2>", $opt->{err} unless $has_e_redir;
1319              
1320             # die Dumper \@run if $cmd[0][0] eq '/usr/bin/rpm2cpio';
1321              
1322 0         0 my $t;
1323 0 0       0 $t = IPC::Run::timer($opt->{every}-0.6) if $opt->{every};
1324 0 0       0 push @run, $t if $t;
1325              
1326 0 0       0 $run[0][0] = $1 if $run[0][0] =~ m{^(.*)$}s;
1327 0 0       0 push @run, debug => $opt->{debug} if $opt->{debug};
1328 0         0 my $h = eval { IPC::Run::start @run; };
  0         0  
1329 0 0       0 return wantarray ? (undef, $@) : undef unless $h;
    0          
1330              
1331 0         0 while ($h->pumpable)
1332             {
1333             # eval {} guards against 'process ended prematurely' errors.
1334             # This happens on very fast commands, despite pumpable().
1335 0         0 eval { $h->pump };
  0         0  
1336 0 0 0     0 if ($t && $t->is_expired)
1337             {
1338 0         0 $t->{has_fired}++;
1339 0         0 $opt->{prog}->($h, $opt);
1340 0         0 $t->start($opt->{every});
1341             }
1342             }
1343 0         0 $h->finish;
1344 0         0 $opt->{finished} = 1;
1345              
1346             ## call it once more, to get the 100% printout, or somthing else...
1347 0 0       0 $opt->{prog}->($h, $opt) if $t->{has_fired};
1348              
1349 0 0       0 return wantarray ? $h->full_results : $h->result;
1350             }
1351              
1352             =head2 fmt_run_shellcmd
1353              
1354             File::Unpack::fmt_run_shellcmd( $m->{argvv} )
1355              
1356             Static function to pretty print the return value $m of method find_mime_helper();
1357             It formats a command array used with run() as a properly escaped shell command string.
1358              
1359             =cut
1360              
1361             sub _my_shell_quote
1362             {
1363 413     413   586 my @a = @_;
1364 413         320 my $sub;
1365 413 50 33     1434 if (@a and defined $a[0])
1366             {
1367 413 100       874 $sub = '\\&_locate_tar' if $a[0] eq \&_locate_tar;
1368 413 100       894 $sub = '\\&_locate_cpio_i' if $a[0] eq \&_locate_cpio_i;
1369             }
1370              
1371 413 100       688 if ($sub)
1372             {
1373 49         47 shift @a;
1374 49         121 return "$sub " . shell_quote(@a);
1375             }
1376 364         666 return shell_quote(@a);
1377             }
1378              
1379             sub fmt_run_shellcmd
1380             {
1381 189     189 1 260 my @a = @_;
1382 189 50       379 @a = @{$a[0]{argvv}} if ref $a[0] eq 'HASH';
  0         0  
1383 189         202 my @r = ();
1384 189         265 for my $a (@a)
1385             {
1386 413 100       6901 push @r, ref($a) ? '('._my_shell_quote(@$a).')' : _my_shell_quote($a);
1387             }
1388 189         7017 my $r = join ' ', @r;
1389 189 100       657 $r =~ s{^\((.*)\)$}{$1} unless $#a; # parenthesis around a single cmd are unneeded.
1390 189         857 return $r;
1391             }
1392              
1393             ## not a method, officially.
1394             #
1395             ## Chdir in and out of a jail is done here, as IPC::Run::run({init}->())
1396             ## has bad timing for our purposes.
1397             #
1398             ## fastjar extracts happily to ../../..
1399             ## this happens in cups-1.2.1/scripting/java/cups.jar
1400             #
1401             ## FIXME:
1402             # "/tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/scripting/java/cups.jar":
1403             # {"cmd":"/usr/bin/unzip -P no_pw -q -o '%(src)s'",
1404             # "unpacked":"/tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/_Knw_"}
1405             # Two issues:
1406             # a) _run_mime_helper in /tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4
1407             # should be /tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/scripting/java
1408             # b) _Knw_ should never appear in the end result ...
1409             #
1410              
1411             sub _run_mime_helper
1412             {
1413 0     0   0 my ($self, $h, @argv) = @_;
1414              
1415 0         0 for my $i (0..$#argv)
1416             {
1417 0 0       0 $argv[$i] = $1 if $argv[$i] =~ m{^(.*)$}s; # brute force untaint
1418             }
1419              
1420 0         0 my $destdir = $argv[2];
1421 0   0     0 my $dot_dot_safeguard = $self->{dot_dot_safeguard}||0;
1422 0 0       0 $dot_dot_safeguard = 2 if $dot_dot_safeguard < 2;
1423              
1424 0         0 my $jail_base = '/dev/null';
1425 0         0 my $jail = $jail_base;
1426 0 0       0 unless ($self->{no_op})
1427             {
1428 0         0 mkpath($destdir);
1429 0         0 $jail_base = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir);
1430 0         0 $jail = $jail_base . ("/_" x $dot_dot_safeguard);
1431 0         0 mkpath($jail);
1432             }
1433              
1434 0         0 my $args =
1435             {
1436             src => $argv[0], # abs_path() - but not symlink resolved, so that the unpacker sees 'our' name
1437             destfile => $argv[1], # filename() - a suggested name, simply based on src, in case the unpacker needs it.
1438             destdir => $jail, # abs_path() - for now...
1439             mime => $argv[3],
1440             descr => $argv[4], # mime_descr
1441             configdir => $argv[5] # abs_path()
1442             };
1443 0         0 $args->{lsrc} = Cwd::realpath($args->{src}); # symlinks resolved; use this with a stupid unpacker like 'upx'
1444 0 0       0 die "src must be an abs_path." unless $args->{src} =~ m{^/};
1445            
1446 0         0 my @cmd;
1447 0         0 for my $a (@{$h->{argvv}})
  0         0  
1448             {
1449 0 0       0 if (ref $a)
1450             {
1451 0         0 my @c = ();
1452 0         0 for my $b (@$a)
1453             {
1454 0         0 push @c, _subst_args($b, $args);
1455             }
1456 0         0 push @cmd, [@c];
1457             }
1458             else
1459             {
1460 0         0 push @cmd, _subst_args($a, $args);
1461             }
1462             }
1463              
1464 0 0       0 if ($self->{no_op})
1465             {
1466 0         0 print fmt_run_shellcmd(@cmd) . "\n";
1467 0         0 return undef;
1468             }
1469              
1470 0 0       0 print STDERR "_run_mime_helper in $destdir: " . fmt_run_shellcmd(@cmd) . "\n" if $self->{verbose} > 1;
1471              
1472 0 0       0 my $cwd = getcwd() or carp "cannot fetch initial working directory, getcwd: $!";
1473 0 0       0 $cwd = $1 if $cwd =~ m{^(.*)$}s; # brute force untaint. Whereever you go, there you are.
1474 0 0       0 chdir $jail or die "chdir '$jail'";
1475 0 0       0 chmod 0, $jail_base if $self->{jail_chmod0};
1476             # Now have fully initialzed in the parent before forking.
1477             # This is needed, as all redirect operators are executed in the parent before forking.
1478             # init => sub { ... } is no longer needed. sigh, I really wanted to the init sub for the chdir.
1479             # But hey, mkpath() and rmtree() change the cwd so often, and restore it, so why shouldn't we?
1480              
1481              
1482 0         0 my $run_error = undef; # we capture the first error line for the logfile.
1483             my @r = $self->run(@cmd,
1484             {
1485             debug => ($self->{verbose} > 2) ? $self->{verbose} - 2 : 0,
1486             watch => $args->{src}, every => 5, fu_obj => $self, mime_helper => $h,
1487 0 0   0   0 err => sub { print "E: @_\n" if $self->{verbose}; $run_error = "@_" unless length $run_error },
  0 0       0  
1488             prog => sub
1489             {
1490 0     0   0 $_[1]{tick}++;
1491 0         0 my $name = $_[1]{watch}; $name =~ s{.*/}{};
  0         0  
1492 0 0       0 if ($_[1]{finished})
    0          
1493             {
1494 0 0       0 printf "T: %s (%s, done)\n", $name, _unit_bytes(-s $_[1]{watch},1)
1495             if $self->{verbose};
1496             }
1497             elsif (my $p = _children_fuser($_[1]{watch}, POSIX::getpid()))
1498             {
1499 0         0 _fuser_offset($p);
1500             # we may get muliple process with multiple filedescriptors.
1501             # select the one that moves fastest.
1502 0         0 my $largest_diff = -1;
1503 0         0 for my $pid (keys %$p)
1504             {
1505 0         0 for my $fd (keys %{$p->{$pid}{fd}})
  0         0  
1506             {
1507 0   0     0 my $diff = ($p->{$pid}{fd}{$fd}{pos}||0) - ($_[1]{fuser}{$pid}{fd}{$fd}{pos}||0);
      0        
1508 0 0       0 if ($diff > $largest_diff)
1509             {
1510 0         0 $largest_diff = $diff;
1511 0         0 $p->{fastest_fd} = $p->{$pid}{fd}{$fd};
1512             }
1513             }
1514             }
1515             # Stick with the one we had before, if none moves.
1516 0 0       0 $p->{fastest_fd} = $_[1]{fuser}{fastest_fd} if $largest_diff <= 0;
1517 0         0 $_[1]{fuser} = $p;
1518 0   0     0 my $off = $p->{fastest_fd}{pos}||0;
1519 0   0     0 my $tot = $p->{fastest_fd}{size}||(-s $_[1]{watch})||1;
1520 0 0       0 printf "T: %s (%s, %.1f%%)\n", $name, _unit_bytes($off,1), ($off*100)/$tot
1521             if $self->{verbose};
1522             }
1523             else
1524             {
1525 0 0       0 print "T: $name tick_tick $_[1]{tick}\n"
1526             if $self->{verbose};
1527             }
1528             },
1529 0 0       0 });
1530            
1531             # system("ls -la $jail_base/..; find $jail_base");
1532             # print STDERR Dumper \@r;
1533              
1534 0 0       0 chmod 0700, $jail_base if $self->{jail_chmod0};
1535 0 0       0 chdir $cwd or die "cannot chdir back to cwd: chdir($cwd): $!";
1536 0         0 my @nonzero = grep { $_ } @r;
  0         0  
1537              
1538             # TODO: handle failure
1539             # - remove all,
1540             # - retry with a fallback helper , if any.
1541 0 0 0     0 printf STDERR "Non-Zero return value: $nonzero[0]: %s\n", fmt_run_shellcmd(@cmd)
1542             if $nonzero[0] and $self->{verbose};
1543              
1544             # FIXME: fallback helper not implemented
1545             # t/data/pdftxt-a.txt is really plain/text altthough it begins with "PDF-1.4..." and
1546             # thus fools the mime-type tests.
1547             # should run other helpers, and finally 'strings -' as a trivial fallback.
1548 0 0       0 if ($nonzero[0])
1549             {
1550 0         0 rmtree($jail_base); # empty or has unusable contents now.
1551             ## FIXME: we should at least copy in the original file as is...
1552 0         0 return { error => "nonzero retval:\n " . Dumper(\@r), stderr => $run_error };
1553             }
1554              
1555             # loop through all _: if it only contains one item , replace it with this item,
1556             # be it a file or dir. This uses $jail_tmp, an unused pathname.
1557 0         0 my $jail_tmp = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir);
1558 0         0 rmdir $jail_tmp;
1559              
1560             # if only one file in $jail, move it up, and return
1561             # the filename instead of the dirname here.
1562             # (We don't search for $args->{destfile}, it is the unpackers choice to use it or not.)
1563 0         0 my $wanted_name;
1564 0         0 for (my $i = 0; $i <= $dot_dot_safeguard; $i++)
1565             {
1566 0 0       0 opendir DIR, $jail_base or last;
1567 0 0       0 my @found = grep { $_ ne '.' and $_ ne '..' } readdir DIR;
  0         0  
1568 0         0 closedir DIR;
1569 0         0 my $found0;
1570 0 0 0     0 $found0 = $1 if defined($found[0]) and $found[0] =~ m{^(.*)$}s; # brute force untaint
1571 0 0       0 print STDERR "dot_dot_safeguard=$dot_dot_safeguard, i=$i, found=$found0\n" if $self->{verbose} > 2;
1572 0 0       0 unless (@found)
1573             {
1574 0         0 rmdir $jail_base;
1575 0         0 my $name;
1576 0 0       0 $name = $1 if $args->{src} =~ m{/([^/]+)$};
1577 0 0       0 print STDERR "oops(i=$i): nothing unpacked?? Adding $name as is.\n" if $self->{verbose};
1578 0         0 return { error => "nothing unpacked" };
1579             }
1580 0 0       0 last if scalar @found != 1;
1581 0 0       0 $wanted_name = $found0 if $i == $dot_dot_safeguard;
1582 0 0       0 last unless -d $jail_base . "/" . $found0;
1583             # assert writable dirs. needed for ksh/ast-base.2012-08-01.tar.bz2/src/cmd/pax/data/ro.dat
1584 0         0 chmod(0755, $jail_base . "/" . $found0);
1585 0 0       0 rename $jail_base, $jail_tmp or die("4:$i rename($jail_base, $jail_tmp) failed: $!");
1586 0 0       0 rename $jail_tmp . "/" . $found0, $jail_base or die("5:$i rename($jail_tmp .'/'. $found0, $jail_base) failed: $!");
1587 0 0       0 rmdir $jail_tmp or last;
1588             }
1589              
1590             ## this message is broken.
1591             # print STDERR "Hmmm, unpacker did not use destname: $args->{destfile}\n" if $self->{verbose} and !defined $wanted_name;
1592              
1593             # say nothing, if $args->{destfile} is equal to or a prefix of $wanted_name.
1594 0 0 0     0 print STDERR "Hmmm, unpacker saw destname: $args->{destfile}, but used destname: $wanted_name\n"
      0        
1595             if $self->{verbose} > 1 and defined($wanted_name) and $wanted_name !~ m{^\Q$args->{destfile}};
1596              
1597 0 0       0 $wanted_name = $args->{destfile} unless defined $wanted_name;
1598 0         0 my $wanted_path;
1599 0 0       0 $wanted_path = _unused_pathname($destdir, $wanted_name) if defined $wanted_name;
1600 0         0 my $unpacked = $jail_base;
1601 0 0 0     0 if (defined($wanted_name) and !-e $wanted_path)
1602             {
1603 0 0       0 if (-d $jail_base)
1604             {
1605             ## find out, if the unpacker created exactly one file or one directory,
1606             ## in this case we can move one level further.
1607 0         0 opendir DIR, $jail_base;
1608 0 0       0 my @found = grep { $_ ne '.' and $_ ne '..' } readdir DIR;
  0         0  
1609 0         0 closedir DIR;
1610 0         0 my $found0;
1611 0 0 0     0 $found0 = $1 if defined($found[0]) and $found[0] =~ m{^(.*)$}s; # brute force untaint
1612              
1613 0 0 0     0 if ($#found == 0 and $found0 eq $wanted_name)
1614             {
1615 0 0       0 rename "$jail_base/$found0", $wanted_path or die "1 rename($jail_base/$found0, $wanted_path) failed: $!";
1616 0         0 rmdir $jail_base;
1617             }
1618             else
1619             {
1620 0 0       0 rename $jail_base, $wanted_path or die "2 rename($jail_base, $wanted_path) failed: $!";
1621             }
1622             }
1623             else
1624             {
1625 0 0       0 rename $jail_base, $wanted_path or die "3 rename($jail_base, $wanted_path) failed: $!";
1626             }
1627 0         0 $unpacked = $wanted_path;
1628             }
1629              
1630             # catch some patholigical cases.
1631 0 0 0     0 if (-f $unpacked and !-l $unpacked)
1632             {
1633 0 0       0 if (!-s $unpacked)
    0          
1634             {
1635 0 0       0 print STDERR "Ooops, only one empty file -> symlink back\n" if $self->{verbose};
1636 0         0 unlink $unpacked;
1637 0         0 symlink $args->{src}, $unpacked;
1638             }
1639             elsif (-s $unpacked eq (my $s = -s $args->{src}))
1640             {
1641 0 0       0 print STDERR "Hmm, same size ($s bytes) after unpacking???\n" if $self->{verbose};
1642             ## xz -dc -f behaves like cat, if called on an unknown file.
1643             ## Compare the files. If they are identical, stop this:
1644 0 0       0 if (File::Compare::cmp($args->{src}, $unpacked) == 0)
1645             {
1646 0 0       0 print STDERR "Oops, identical -> symlink back\n" if $self->{verbose};
1647 0         0 unlink $unpacked;
1648 0         0 symlink $args->{src}, $unpacked;
1649             }
1650             }
1651             }
1652              
1653 0         0 my $diag = undef;
1654 0 0       0 $diag->{stderr} = $run_error if defined $run_error;
1655 0         0 return ($unpacked, $diag);
1656             }
1657              
1658             sub _unused_pathname
1659             {
1660 0     0   0 my ($destdir, $wanted_name) = @_;
1661 0         0 my $wanted_path = $destdir . "/" . $wanted_name;
1662              
1663 0 0       0 if (-e $wanted_path)
1664             {
1665             ## try to come up with a very similar name, just different suffix.
1666             ## be compatible with path name shortening in unpack()
1667 0         0 my $test_path = $wanted_path . '._';
1668 0         0 for my $i ('', 1..999)
1669             {
1670             # All our mime detectors work on file contents, rather than on suffixes.
1671             # Thus messing with the suffix should be okay here.
1672 0 0       0 unless (-e $test_path.$i)
1673             {
1674 0         0 $wanted_path = $test_path.$i;
1675 0         0 last;
1676             }
1677             }
1678             }
1679 0 0       0 die "_unused_pathname failed: last attempt $wanted_path\n" if -e $wanted_path;
1680 0         0 return $wanted_path;
1681             }
1682              
1683              
1684             sub _children_fuser
1685             {
1686 0     0   0 my ($file, $ppid) = @_;
1687 0   0     0 $ppid ||= 1;
1688 0         0 $file = Cwd::abs_path($file);
1689              
1690 0 0       0 opendir DIR, "/proc" or die "opendir /proc failed: $!\n";
1691 0         0 my %p = map { $_ => {} } grep { /^\d+$/ } readdir DIR;
  0         0  
  0         0  
1692 0         0 closedir DIR;
1693              
1694             # get all procs, and their parent pids
1695 0         0 for my $p (keys %p)
1696             {
1697 0 0       0 if (open IN, "<", "/proc/$p/stat")
1698             {
1699             # don't care if open fails. the process may have exited.
1700 0         0 my $text = join '', ;
1701 0         0 close IN;
1702 0 0       0 if ($text =~ m{\((.*)\)\s+(\w)\s+(\d+)}s)
1703             {
1704 0         0 $p{$p}{cmd} = $1;
1705 0         0 $p{$p}{state} = $2;
1706 0         0 $p{$p}{ppid} = $3;
1707             }
1708             }
1709             }
1710              
1711             # Weed out those who are not in our family
1712 0 0       0 if ($ppid > 1)
1713             {
1714 0         0 for my $p (keys %p)
1715             {
1716 0         0 my $family = 0;
1717 0         0 my $pid = $p;
1718 0         0 while ($pid)
1719             {
1720             # Those that have ppid==1 may also belong to our family.
1721             # We never know.
1722 0 0 0     0 if ($pid == $ppid or $pid == 1)
1723             {
1724 0         0 $family = 1;
1725 0         0 last;
1726             }
1727 0 0       0 last unless $p{$pid};
1728 0         0 $pid = $p{$pid}{ppid};
1729             }
1730 0 0       0 delete $p{$p} unless $family;
1731             }
1732             }
1733              
1734 0         0 my %o; # matching open files are recorded here
1735              
1736             # see what files they have open
1737 0         0 for my $p (keys %p)
1738             {
1739 0 0       0 if (opendir DIR, "/proc/$p/fd")
1740             {
1741 0         0 my @l = grep { /^\d+$/ } readdir DIR;
  0         0  
1742 0         0 closedir DIR;
1743 0         0 for my $l (@l)
1744             {
1745 0         0 my $r = readlink("/proc/$p/fd/$l");
1746 0 0       0 next unless defined $r;
1747             # warn "$p, $l, $r\n";
1748 0 0       0 if ($r eq $file)
1749             {
1750 0   0     0 $o{$p}{cmd} ||= $p{$p}{cmd};
1751 0         0 $o{$p}{fd}{$l} = { file => $file };
1752             }
1753             }
1754             }
1755             }
1756 0         0 return \%o;
1757             }
1758              
1759             # see if we can read the file offset of a file descriptor, and the size of its file.
1760             sub _fuser_offset
1761             {
1762 0     0   0 my ($p) = @_;
1763 0         0 for my $pid (keys %$p)
1764             {
1765 0         0 for my $fd (keys %{$p->{$pid}{fd}})
  0         0  
1766             {
1767 0 0       0 if (open IN, "/proc/$pid/fdinfo/$fd")
1768             {
1769 0         0 while (defined (my $line = ))
1770             {
1771 0         0 chomp $line;
1772 0 0       0 $p->{$pid}{fd}{$fd}{$1} = $2 if $line =~ m{^(\w+):\s+(.*)\b};
1773             }
1774             }
1775 0         0 close IN;
1776 0         0 $p->{$pid}{fd}{$fd}{size} = -s $p->{$pid}{fd}{$fd}{file};
1777             }
1778             }
1779             }
1780              
1781              
1782             sub _prep_configdir
1783             {
1784 0     0   0 my ($self) = @_;
1785 0         0 my $dir = "/tmp/file_unpack_$$/";
1786 0         0 mkpath($dir);
1787 0         0 my $j = $self->{json}->allow_nonref();
1788              
1789 0         0 open my $SH, ">", "$dir/config.sh";
1790 0         0 open my $JS, ">", "$dir/config.js";
1791              
1792 0         0 print $JS "{\n";
1793              
1794 0         0 for my $group ('', 'minfree', 'exclude')
1795             {
1796 0 0       0 my $h_ref = ($group eq '') ? $self : $self->{$group};
1797 0         0 for my $k (sort keys %$h_ref)
1798             {
1799 0         0 my $val = $h_ref->{$k};
1800 0 0       0 next if $k eq 'recursion_level';
1801 0 0       0 next if ref $val; # we only take scalars.
1802 0 0       0 my $name = ($group eq '') ? $k : "${group}_$k";
1803 0         0 printf $SH "%s=%s\n", shell_quote(uc "fu_$name"), shell_quote($val);
1804 0         0 printf $JS "%s:%s,\n", $j->encode($name), $j->encode($val);
1805             }
1806             }
1807              
1808 0         0 print $SH "FU_VERSION=$VERSION\n";
1809 0         0 print $JS qq["fu_version":"$VERSION"\n}\n];
1810              
1811 0         0 close $SH;
1812 0         0 close $JS;
1813 0         0 return $dir;
1814             }
1815              
1816              
1817             =head2 mime_helper_dir mime_helper
1818              
1819             $u->mime_helper_dir($dir, ...)
1820             $u->mime_helper($mime_name, $suffix_regexp, \@argv, @redir, ...)
1821              
1822             Registers one or more directories where external MIME helper programs are found.
1823             Helpers plugins are shellscripts that server as specialized MIME type handlers for unpacking.
1824             A list of helpers comes builtin interfacing most well-known archivers. This list can be appended to using the mime_helper_dir() or mime_helper() methods.
1825             Multiple directories can be registered, They are searched in reverse order, i.e.
1826             last added takes precedence. Any external MIME helper takes precedence over built-in code.
1827              
1828             The suffix_regexp is used to derive the destination name from the source name.
1829             It is not used for selecting helpers.
1830              
1831             When collecting external helper scripts via C, there is no C. Instead,
1832             external helper scripts can explicitly create a toplevel directory with the desired name.
1833              
1834             Helpers are mapped to MIME types by their mime_name. The name can be constructed
1835             from the MIME type by replacing the '/' with a '=' character, and by using the
1836             word 'ANY' as a wildcard component. The '=' character is interpreted as an
1837             implicit '=ANY+' if needed.
1838              
1839             Examples:
1840              
1841             Mimetype helper names tried from top to bottom
1842             -----------------------------------------------------------------
1843             image/png image=png
1844             image=ANY
1845             image
1846             ANY=png
1847             ANY=ANY
1848             ANY
1849              
1850             application/vnd.oasis+zip application=vnd.oasis+zip
1851             application=ANY+zip
1852             application=ANYzip
1853             application=zip
1854             application=ANY
1855             ...
1856            
1857             A trailing '=ANY' is implicit, as shown by these examples.
1858             The rules for precedence are this:
1859              
1860             =over
1861              
1862             =item *
1863              
1864             Search in the latest directory is exhaused first, then the previously added directory is considered in turn,
1865             up to all directories have been traversed, or until a matching helper is found.
1866            
1867             =item *
1868              
1869             A matching name with wildcards has lower precedence than a matching name without.
1870              
1871             =item *
1872              
1873             A wildcard before the '=' sign lowers precedence more than one after it.
1874              
1875             =back
1876              
1877             The mapping takes place when C is called. Adding helper scripts to a directory
1878             afterwards has no effect. C does not do any implicit expansions. Call it
1879             multiple times with the same helper command and different names if needed.
1880             The default argument list is "%(src)s %(destfile)s %(destdir)s %(mime)s %(descr)s %(configdir)s" --
1881             this is applied, if no args are given and no redirections are given. See also C for more semantics and how a helper should behave.
1882              
1883             Both methods return an ARRAY-ref of HASHes describing all known (old and newly added) mime helpers.
1884              
1885             =cut
1886             my @def_mime_helper_fmt = qw(%(src)s %(destfile)s %(destdir)s %(mime)s %(descr)s %(configdir)s);
1887              
1888             sub _subst_args
1889             {
1890 0     0   0 my $f = Text::Sprintf::Named->new({fmt => $_[0]});
1891 0         0 return $f->format({args => $_[1]});
1892             }
1893              
1894             sub mime_helper
1895             {
1896 189     189 1 354 my ($self, $name, $suffix_re, @args) = @_;
1897 189 50       321 @args = ($name) unless @args;
1898 189 50       342 @args = ([@args]) unless ref $args[0];
1899 189 50 66     148 push @{$args[0]}, @def_mime_helper_fmt unless $#{$args[0]} or defined $args[1];
  0         0  
  189         538  
1900              
1901             # cut away the path prefix from name. And use / instead of = in the mime name.
1902 189         1218 $name =~ s{(.*/)?(.*?)=(.*?)$}{$2=$3};
1903              
1904 189 50       499 unless ($name =~ m{[/=]})
1905             {
1906 0 0       0 print STDERR "mime_helper '$name' needs a '=' or '/'.\n" if $self->{verbose};
1907 0         0 return $self->{mime_helper};
1908             }
1909              
1910 189         318 my $pat = "^\Q$name\E\$";
1911 189         382 $pat =~ s{\\=}{/(?:x-|ANY\\+)?};
1912 189         205 $pat =~ s{\\%}{ANY}g;
1913 189         174 $pat =~ s{^\^ANY}{};
1914 189         161 $pat =~ s{ANY\$$}{};
1915 189         331 $pat =~ s{ANY}{\\b\[\^\/\]+\\b}g;
1916 189         173 unshift @{$self->{mime_helper}},
  189         429  
1917             {
1918             name => $name, pat => $pat, suffix_re => $suffix_re,
1919             fmt_p => fmt_run_shellcmd(@args), argvv => \@args
1920             };
1921              
1922 189         258 delete $self->{mime_orcish}; # to be rebuilt in find_mime_helper()
1923              
1924 189         508 return $self->{mime_helper};
1925             }
1926              
1927             =head2 list
1928              
1929             Returns an ARRAY of preformatted patterns and MIME helpers.
1930              
1931             Example:
1932              
1933             printf @$_ for $u->list();
1934              
1935             =cut
1936              
1937             sub list
1938             {
1939 1     1 1 4 my ($self) = @_;
1940              
1941 1         22 my $width = 10;
1942 1         1 for my $m (@{$self->{mime_helper}})
  1         3  
1943             {
1944 27 100       43 $width = length($m->{pat}) if length($m->{pat}) > $width;
1945             }
1946              
1947 1         1 my @r;
1948 1         1 for my $m (@{$self->{mime_helper}})
  1         7  
1949             {
1950 27         49 push @r, [ "%-${width}s %s\n", $m->{pat}, $m->{fmt_p} ];
1951             }
1952 1         4 return @r;
1953             }
1954              
1955             sub mime_helper_dir
1956             {
1957 0     0 1 0 my ($self, @dirs) = @_;
1958              
1959 0         0 for my $d (@dirs)
1960             {
1961 0         0 my %h;
1962 0 0       0 if (opendir DIR, $d)
1963             {
1964 0         0 %h = map { $_ => { a => "$d/$_" } } grep { -f "$d/$_" } readdir DIR;
  0         0  
  0         0  
1965 0         0 closedir DIR;
1966             }
1967             else
1968             {
1969 0         0 carp "Cannot opendir $d: $!, skipped\n";
1970             }
1971              
1972             # add =ANY suffix, if missing
1973 0         0 for my $h (keys %h)
1974             {
1975 0 0       0 if ($h !~ m{[/=]})
1976             {
1977 0         0 my $h2 = $h . "=ANY";
1978 0 0       0 $h{$h2} = { %{$h{$h}} } unless defined $h{$h2};
  0         0  
1979             }
1980             }
1981              
1982             # not needed, this is implicit in mime_helper()/$pat
1983             #
1984             # # add expansion of = to =ANY+, if missing
1985             # for my $h (keys %h)
1986             # {
1987             # next if $h =~ m{=ANY+};
1988             # my $h2 = $h; $h2 =~ s{=}{=ANY+};
1989             # $h{$h2} = $h{$h} unless defined $h{$h2};
1990             # }
1991              
1992             # calculate priorities
1993 0         0 for my $h (keys %h)
1994             {
1995 0         0 my $n = 1000000;
1996 0         0 my $p = 1000;
1997 0         0 while ($h =~ m{(ANY|=)}g)
1998             {
1999 0 0       0 if ($1 eq '=')
2000             {
2001 0         0 $n = 1000;
2002             }
2003             else
2004             {
2005 0         0 $p += $n;
2006             }
2007             }
2008             # longer length has prio over shorter length. Hmm, this is ineffective, isnt it?
2009 0         0 $h{$h}{p} = $p - length($h);
2010             }
2011              
2012             # Now push them, sorted by prio.
2013             # Smaller prio_number is better. Later addition is prefered.
2014 0         0 for my $h (sort { $h{$b}{p} <=> $h{$a}{p} } keys %h)
  0         0  
2015             {
2016             # do not ruin the original name by resolving symlinks and such.
2017 0         0 $self->mime_helper($h, undef, [Cwd::fast_abs_path($h{$h}{a})]);
2018             }
2019             }
2020 0         0 return $self->{mime_helper};
2021             }
2022              
2023             =head2 find_mime_helper
2024              
2025             $u->find_mime_helper($mimetype)
2026              
2027             Returns a MIME helper suitable for unpacking the given $mimetype.
2028             If called in list context, a second return value indicates which
2029             mime helpers would be suitable, but could not be found in the system.
2030              
2031             =cut
2032              
2033             sub find_mime_helper
2034             {
2035 0     0 1 0 my ($self, $mimetype) = @_;
2036 0 0       0 $mimetype = $mimetype->[0] if ref $mimetype eq 'ARRAY';
2037              
2038 0 0 0     0 return $self->{mime_orcish}{$mimetype}
2039             if defined $self->{mime_orcish}{$mimetype} and
2040             -f $self->{mime_orcish}{$mimetype}{argvv}[0][0];
2041            
2042 0         0 my $r = undef;
2043 0         0 for my $h (@{$self->{mime_helper}})
  0         0  
2044             {
2045 0 0       0 if ($mimetype =~ m{$h->{pat}})
2046             {
2047 0         0 $self->_finalize_argvv($h);
2048 0 0       0 unless (-f $h->{argvv}[0][0])
2049             {
2050 0         0 push @{$r->{missing}}, $h->{argvv}[0][0];
  0         0  
2051 0         0 next;
2052             }
2053 0         0 $self->{mime_orcish}{$mimetype} = $h;
2054 0 0       0 return wantarray ? ($h, $r) : $h;
2055             }
2056             }
2057 0 0       0 return wantarray ? (undef, $r) : undef;
2058             }
2059              
2060             #
2061             # _finalize_argvv() executes a sub in 3 places:
2062             # The argvv ptr itself can be a sub:
2063             # this should return an array, where the
2064             # first element is the command (as an array-ref) and subsequent elements are
2065             # redirects. See run() for details.
2066             # One of the argvv elements is a sub:
2067             # this should return the command as an array-ref, if it is argvv[0],
2068             # or return one or more redirects.
2069             # One element of argvv[0] is a sub:
2070             # this should return one or more command names, options, arguments,
2071             #
2072             # Tricky part of the implementation is the in-place array expansion while iterating.
2073             #
2074             sub _finalize_argvv
2075             {
2076 0     0   0 my ($self, $h) = @_;
2077              
2078 0         0 my $update_fmt_p = 0;
2079 0 0       0 if (ref $h->{argvv} eq 'CODE')
2080             {
2081 0         0 $h->{argvv} = [ $h->{argvv}->($self) ];
2082 0         0 $update_fmt_p++;
2083             }
2084              
2085             # If any part of LIST is an array, "foreach" will get very confused if you add or
2086             # remove elements within the loop body, for example with "splice". So don't do
2087             # that.
2088             # Sigh, we want do do exactly that, a sub may replace itself by any number of elements. Use booring C-style loop.
2089 0         0 my $last = $#{$h->{argvv}};
  0         0  
2090 0         0 for (my $idx = 0; $idx <= $last; $idx++)
2091             {
2092 0 0       0 if (ref $h->{argvv}[$idx] eq 'CODE')
2093             {
2094 0         0 my @r = $h->{argvv}[$idx]($self);
2095 0         0 splice @{$h->{argvv}}, $idx, 1, @r;
  0         0  
2096 0         0 $idx += $#r;
2097 0         0 $last +=$#r;
2098 0         0 $update_fmt_p++;
2099             }
2100             }
2101 0         0 $last = $#{$h->{argvv}};
  0         0  
2102 0         0 for (my $idx = 0; $idx <= $last; $idx++)
2103             {
2104 0 0       0 next unless ref $h->{argvv}[$idx] eq 'ARRAY';
2105 0         0 my $last1 = $#{$h->{argvv}[$idx]};
  0         0  
2106 0         0 for (my $idx1 = 0; $idx1 <= $last1; $idx1++)
2107             {
2108 0 0       0 if (ref $h->{argvv}[$idx][$idx1] eq 'CODE')
2109             {
2110 0         0 my @r = $h->{argvv}[$idx][$idx1]->($self);
2111 0         0 splice @{$h->{argvv}[$idx]}, $idx1, 1, @r;
  0         0  
2112 0         0 $idx1 += $#r;
2113 0         0 $last1 +=$#r;
2114 0         0 $update_fmt_p++;
2115             }
2116             }
2117             }
2118              
2119 0 0       0 $h->{fmt_p} = fmt_run_shellcmd($h) if $update_fmt_p;
2120             }
2121              
2122             =head2 minfree
2123              
2124             $u->minfree(factor => 10, bytes => '100M', percent => '3%', warning => sub { .. })
2125              
2126             THESE TESTS ARE TO BE IMPLEMENTED.
2127              
2128             Guard the filesystem (destdir) against becoming full during C.
2129             Before unpacking each source archive, the free space is measured and compared against three conditions:
2130              
2131             =over
2132              
2133             =item *
2134              
2135             The archive size multiplied with the given factor must fit into the filesystem.
2136              
2137             =item *
2138              
2139             The given number of bytes (in optional K, M, G, or T units) must be free.
2140              
2141             =item *
2142              
2143             The filesystem must have at least the given free percentage. The '%' character is optional.
2144            
2145             =back
2146              
2147             The warning method is called if any of the above conditions fail. Its signature is:
2148             &warning->($pathname, $full_percentage, $free_bytes, $free_inodes);
2149             It is expected to print an appropriate warning message, and delay a few seconds.
2150             It should return 0 to cause a retry. It should return nonzero to continue unpacking.
2151             The default warning method prints a message to STDERR, waits 30 seconds, and returns 0.
2152              
2153             The filesystem may still become full and unpacking may fail, if e.g. factor was chosen lower than
2154             the average compression ratio of the archives.
2155              
2156             =cut
2157              
2158             sub _bytes_unit
2159             {
2160 14     14   23 my ($text) = @_;
2161 14 50       85 return int($1*1024) if $text =~ m{([\d\.]+)k}i;
2162 14 100       80 return int($1*1024*1024) if $text =~ m{([\d\.]+)m}i;
2163 7 50       91 return int($1*1024*1024*1024) if $text =~ m{([\d\.]+)g}i;
2164 0 0       0 return int($1*1024*1024*1024*1024) if $text =~ m{([\d\.]+)t}i;
2165 0         0 return int($text);
2166             }
2167              
2168             sub _unit_bytes
2169             {
2170 0     0   0 my ($number, $dec_places) = @_;
2171 0 0       0 $dec_places = 2 unless defined $dec_places;
2172 0         0 my $div = 1;
2173 0         0 my $unit = '';
2174 0         0 my $neg = '';
2175 0 0       0 if ($number < 0)
2176             {
2177 0         0 $neg = '-'; $number = -$number;
  0         0  
2178             }
2179 0 0       0 if ($number > $div * 1024)
2180             {
2181 0         0 $div *= 1024; $unit = 'k';
  0         0  
2182 0 0       0 if ($number > $div * 1024)
2183             {
2184 0         0 $div *= 1024; $unit = 'm';
  0         0  
2185 0 0       0 if ($number > $div * 1024)
2186             {
2187 0         0 $div *= 1024; $unit = 'g';
  0         0  
2188 0 0       0 if ($number > $div * 1024)
2189             {
2190 0         0 $div *= 1024; $unit = 't';
  0         0  
2191             }
2192             }
2193             }
2194             }
2195 0         0 return sprintf "%s%.*f%s", $neg, $dec_places, ($number / $div), $unit;
2196             }
2197              
2198             # see fs.pm/check_fs_health()
2199              
2200             sub minfree
2201             {
2202 7     7 1 15 my $self = shift;
2203 7         19 my %opt = @_;
2204              
2205 7         18 for my $i (qw(factor bytes percent))
2206             {
2207 21 50       44 $self->{minfree}{$i} = $opt{$i} if defined $opt{$i};
2208 21   50     57 $self->{minfree}{$i} ||= 0;
2209             }
2210 7         28 $self->{minfree}{bytes} = _bytes_unit($self->{minfree}{bytes});
2211 7         33 $self->{minfree}{percent} =~ s{%$}{};
2212 7 50       48 $self->{fs_warn} = $opt{warning} if ref $opt{warning};
2213             }
2214              
2215             =head2 mime
2216              
2217             $u->mime($filename)
2218              
2219             $u->mime(file => $filename)
2220              
2221             $u->mime(buf => "#!/bin ...", file => "what-was-read")
2222              
2223             $u->mime(fd => \*STDIN, file => "what-was-opened")
2224              
2225             Determines the MIME type (and optionally additional information) of a file.
2226             The file can be specified by filename, by a provided buffer or an opened file descriptor.
2227             For the latter two cases, specifying a filename is optional, and used only for diagnostics.
2228              
2229             C uses libmagic by Christos Zoulas exposed via File::LibMagic and also uses
2230             the shared-mime-info database from freedesktop.org exposed via
2231             File::MimeInfo::Magic, if available. Either one is sufficient, but having both
2232             is better. LibMagic sometimes says 'text/x-pascal', although we have a F<.desktop>
2233             file, or says 'text/plain', but has contradicting details in its description.
2234              
2235             C is consulted where the libmagic output is dubious. E.g. when
2236             the desciption says something interesting like 'Debian binary package (format 2.0)' but the
2237             mimetype says 'application/octet-stream'. The combination of both libraries gives us
2238             excellent reliability in the critical field of MIME type recognition.
2239              
2240             This implementation also features multi-level MIME type recognition for efficient unpacking.
2241             When e.g. unpacking a large bzipped tar archive, this saves us from creating a
2242             huge temporary tar-file which C would extract in a second step. The multi-level recognition
2243             returns 'application/x-tar+bzip2' in this case, and allows for a MIME helper
2244             to e.g. pipe the bzip2 contents into tar (which is exactly what 'tar jxvf'
2245             does, making a very simple and efficient MIME helper).
2246              
2247             C returns a 3 or 4 element arrayref with mimetype, charset, description, diff;
2248             where diff is only present when the libfile and shared-mime-info methods disagree.
2249              
2250             In case of 'text/plain', an additional rule based on file name suffix is used to allow
2251             recognition of well known plain text pack formats.
2252             We return 'text/x-suffix-XX+plain', where XX is one of the recognized suffixes
2253             (in all lower case and without the dot). E.g. a plain mmencoded file has no
2254             header and looks like 'plain/text' to all the known magic libraries. We
2255             recognize the suffixes .mm, .b64, and .base64 for this (case insignificant).
2256             A similar rule exitst for 'application/octect-stream'. It may trigger e.g. for
2257             LZMA compressed files which fail to provide a magic number.
2258              
2259             Examples:
2260            
2261             [ 'text/x-perl', 'us-ascii', 'a /usr/bin/perl -w script text']
2262              
2263             [ 'text/x-mpegurl', 'utf-8', 'M3U playlist text',
2264             [ 'text/plain', 'application/x-mpegurl']]
2265              
2266             [ 'application/x-tar+bzip2, 'binary',
2267             "bzip2 compressed data, block size = 900k\nPOSIX tar archive (GNU)", ...]
2268              
2269             =cut
2270              
2271             sub mime
2272             {
2273 4     4 1 820 my ($self, @in) = @_;
2274              
2275 4         38 my %in;
2276 4 50 66     27 %in = %{$in[0]} if !$#in and ref $in[0] eq 'HASH';
  0         0  
2277 4 50 66     27 unshift @in, 'file' if !$#in and !ref $in[0];
2278 4 50       23 %in = @in if $#in > 0;
2279              
2280 4   33     417 my $flm = $self->{flm} ||= File::LibMagic->new();
2281              
2282 0 0       0 unless (defined $in{buf})
2283             {
2284 0         0 my $fd = $in{fd};
2285 0 0       0 unless ($fd)
2286             {
2287 0 0       0 open $fd, "<", $in{file} or
2288             return [ 'x-system/x-error', undef, "cannot open '$in{file}': $!" ];
2289             }
2290              
2291 0   0     0 my $f = $in{file}||'-';
2292 0         0 $in{buf} = '';
2293 0         0 my $pos = tell $fd;
2294             ##bzip2 below needs a long buffer, or it returns 0.
2295 0         0 my $len = read $fd, $in{buf}, $UNCOMP_BUFSZ;
2296 0 0       0 return [ 'x-system/x-error', undef, "read '$f' failed: $!" ] unless defined $len;
2297 0 0       0 return [ 'x-system/x-error', undef, "read '$f' failed: $len: $!" ] if $len < 0;
2298 0 0       0 return [ 'text/x-empty', undef, 'empty' ] if $len == 0;
2299 0         0 seek $fd, $pos, 0;
2300              
2301 0 0       0 close $fd unless $in{fd};
2302             }
2303              
2304              
2305             ## flm can say 'cannot open \'IP\' (No such file or directory)'
2306             ## flm can say 'CDF V2 Document, corrupt: Can\'t read SAT' (application/vnd.ms-excel)
2307 0         0 my $mime1 = $flm->checktype_contents($in{buf});
2308 0 0 0     0 if ($mime1 =~ m{, corrupt: } or $mime1 =~ m{^application/octet-stream\b})
2309             {
2310             # application/x-iso9660-image is reported as application/octet-stream if the buffer is short.
2311             # iso images usually start with 0x8000 bytes of all '\0'.
2312 0 0       0 print STDERR "mime: readahead buffer $UNCOMP_BUFSZ too short\n" if $self->{verbose} > 2;
2313 0 0 0     0 if (defined $in{file} and -f $in{file})
2314             {
2315 0 0       0 print STDERR "mime: reopening $in{file}\n" if $self->{verbose} > 1;
2316 0         0 $mime1 = $flm->checktype_filename($in{file});
2317             }
2318             }
2319 0 0       0 print STDERR "flm->checktype_contents: $mime1\n" if $self->{verbose} > 1;
2320 0 0       0 $in{file} = '-' unless defined $in{file};
2321            
2322 0 0       0 return [ 'x-system/x-error', undef, $mime1 ] if $mime1 =~ m{^cannot open};
2323              
2324             # in SLES11 we get 'text/plain charset=utf-8' without semicolon.
2325 0 0 0     0 my $enc; ($mime1, $enc) = ($1,$2) if $mime1 =~ m{^(.*?);\s*(.*)$} or
  0         0  
2326             $mime1 =~ m{^(.*?)\s+(.*)$};
2327 0 0       0 $enc =~ s{^charset=}{} if defined $enc;
2328 0         0 my @r = ($mime1, $enc, $flm->describe_contents($in{buf}) );
2329 0         0 my $mime2;
2330              
2331            
2332 0 0       0 if ($mime1 =~ m{^application/xml})
2333             {
2334             # This is horrible from a greedy text cruncher perspective:
2335             # although xml is a plain text syntax, it is reported by flm to be
2336             # outside text/*
2337 0         0 $r[0] = "text/x-application-xml";
2338             }
2339              
2340 0 0 0     0 if ($mime1 =~ m{^text/x-(?:pascal|fortran)$})
    0 0        
2341             {
2342             # xterm.desktop
2343             # ['text/x-pascal; charset=utf-8','UTF-8 Unicode Pascal program text']
2344             # 'application/x-desktop'
2345             #
2346             # Times-Roman.afm
2347             # ['text/x-fortran; charset=us-ascii','ASCII font metrics']
2348             # 'application/x-font-afm'
2349             #
2350             # debian/rules
2351             # ['text/x-pascal; charset=us-ascii','a /usr/bin/make -f script text']
2352             # 'text/x-makefile'
2353 0 0 0     0 if ($mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); })
  0         0  
  0         0  
2354             {
2355 0 0       0 $r[0] = "text/$1" if $mime2 =~ m{/(\S+)};
2356             }
2357             }
2358             elsif (($mime1 eq 'text/plain' and $r[2] =~ m{(?:PostScript|font)}i)
2359             or ($mime1 eq 'application/postscript'))
2360             {
2361             # 11.3 says:
2362             # IPA.pfa
2363             # ['text/plain; charset=us-ascii','PostScript Type 1 font text (OmegaSerifIPA 001.000)']
2364             # sles11 says:
2365             # IPA.pfa
2366             # ['application/postscript', undef, 'PostScript document text']
2367             #
2368             # mime2 = 'application/x-font-type1'
2369             # $mime2 = eval { File::MimeInfo::Magic::mimetype($in{file}); };
2370 0   0     0 $mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); };
  0         0  
  0         0  
2371 0 0 0     0 if ($mime2 and $mime2 =~ m{^(.*)/(.*)$})
2372             {
2373 0         0 my ($a,$b) = ($1,$2);
2374 0 0       0 $a = 'text' if $r[2] =~ m{\btext\b}i;
2375 0         0 $r[0] = "$a/$b";
2376             }
2377             }
2378              
2379 0 0 0     0 if ($r[0] eq 'text/plain' or
2380             $r[0] eq 'application/octet-stream')
2381             {
2382             # hmm, are we sure? No, if the description contradicts:
2383             #
2384 0 0       0 $r[0] = "text/x-uuencode" if $r[2] eq 'uuencoded or xxencoded text';
2385              
2386             # bin/floor
2387             # ['text/x-pascal; charset=us-ascii','a /usr/bin/tclsh script text']
2388             # 'text/plain'
2389 0 0       0 $r[0] = "text/x-$2" if $r[2] =~ m{^a (\S*/)?([^/\s]+) .*script text$}i;
2390 0 0       0 if ($r[2] =~ m{\bimage\b})
2391             {
2392             # ./opengl/test.tga
2393             # ['application/octet-stream; charset=binary','Targa image data - RGB 128 x 128']
2394             # 'image/x-tga'
2395 0   0     0 $mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); };
  0         0  
  0         0  
2396 0 0 0     0 $r[0] = $mime2 if $mime2 and $mime2 =~ m{^image/};
2397             }
2398             }
2399              
2400 0 0       0 if ($r[0] eq 'application/octet-stream')
2401             {
2402             # it can't get much worse, can it?
2403             ##
2404             # dotdot.tar.lzma
2405             # {'File::MimeInfo::Magic' => 'application/x-lzma-compressed-tar'} -- no, that was suffix based!
2406             # {'File::LibMagic' => ['application/octet-stream; charset=binary','data']}
2407 0   0     0 $mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); };
  0         0  
  0         0  
2408             #
2409             # File::LibMagic misreads monotone-0.99.1/monotone.info-1 as app/bin
2410             # File::MimeInfo::Magic::magic() returns undef for that one.
2411             # But perl itself does not agree:
2412 0 0 0     0 $mime2 ||= 'application/x-text-mixed' if -T $in{file};
2413              
2414 0 0       0 $r[0] = $mime2 if $mime2;
2415             }
2416              
2417 0 0       0 if ($r[0] eq 'application/octet-stream')
2418             {
2419 0 0       0 if ($r[2] =~ m{\bcpio\s+archive\b}i)
2420             {
2421             # OSX pax files are gzipped cpio: 'ASCII cpio archive (pre-SVR4 or odc)'
2422 0         0 $r[0] = 'application/x-cpio';
2423             }
2424             }
2425              
2426 0 0       0 if ($r[0] eq 'application/octet-stream')
2427             {
2428 0 0       0 if ($r[2] =~ m{\bxar\s+archive\b}i)
2429             {
2430             # OSX pkg files are xar containers with gzipped cpio's inside.'
2431 0         0 $r[0] = 'application/x-xar';
2432             }
2433             }
2434              
2435 0         0 my $uncomp_buf = '';
2436              
2437 0 0       0 if ($r[0] eq 'application/octet-stream')
2438             {
2439             ## lzma is an extremly bad format. It has no magic.
2440             #
2441             # WARNING from Compress::unLZMA
2442             # "This version only implements in-memory decompression (patches are welcomed).
2443             # There is no way to recognize a valid LZMA encoded file with the SDK.
2444             # So, in some cases, you can crash your script if you try to uncompress a
2445             # non valid LZMA encoded file."
2446             # Does this also apply to us?
2447             #
2448             # -- hmm, maybe we better leave it at calling lzcat.
2449             # Trade in "always a bit expensive" versus "sometimes crashing"...
2450             #
2451             # my $lztest = `sh -c "/usr/bin/lzcat < $in{file} | head -c 1k > /dev/null" 2>&1`;
2452             # # -> /usr/bin/lzcat: (stdin): File format not recognized
2453             # if ($lztest !~ m{(not recognized|error)}i)
2454             # {
2455             # $r[0] = 'application/x-lzma';
2456             # }
2457              
2458 0 0       0 if (10 < length $in{buf})
2459             {
2460 9     9   104 no strict 'subs'; # Compress::Raw::Lzma::AloneDecoder, LZMA_OK, LZMA_STREAM_END
  9         16  
  9         10675  
2461              
2462 0         0 my $saved_input = $in{buf};
2463 0         0 my ($lz, $stat) = eval { Compress::Raw::Lzma::AloneDecoder->new(-Bufsize => $UNCOMP_BUFSZ, -LimitOutput => 1); };
  0         0  
2464 0 0       0 if ($lz)
2465             {
2466 0         0 $stat = $lz->code($in{buf}, $uncomp_buf);
2467 0 0 0     0 if (($stat == LZMA_OK or $stat == LZMA_STREAM_END)
      0        
2468             and
2469             (length($uncomp_buf) > length($saved_input)))
2470             {
2471 0         0 $r[0] = "application/x-lzma";
2472 0         0 $r[2] = "LZMA compressed data, no magic";
2473             }
2474             # This decompressor consumes the input.
2475 0         0 $in{buf} = $saved_input;
2476             }
2477             }
2478             }
2479             # printf STDERR "in-buf = %d bytes\n", length($in{buf});
2480              
2481 0 0       0 if ($r[0] =~ m{^application/(?:x-)?gzip$})
2482             {
2483 0         0 my ($gz, $stat) = eval { new Compress::Raw::Zlib::Inflate( -WindowBits => WANT_GZIP() ); };
  0         0  
2484 0 0       0 if ($gz)
2485             {
2486 0         0 my $stat = $gz->inflate($in{buf}, $uncomp_buf);
2487             # printf STDERR "stat=%s, uncomp=%d bytes \n", $stat, length($uncomp_buf);
2488             }
2489             }
2490              
2491             ## bzip2 is not nice for stacked mime checking.
2492             ## It needs a huge input buffer that we do not normally provide.
2493             ## We only support it at the top of a stack, where we acquire enough additional
2494             ## input until bzip2 is happy.
2495 0 0 0     0 if ($r[0] =~ m{^application/(?:x-)?bzip2$} && !$in{recursion})
2496             {
2497 0         0 my $limitOutput = 1;
2498 0         0 my ($bz, $stat) = eval { new Compress::Raw::Bunzip2 0, 0, 0, 0, $limitOutput; };
  0         0  
2499 0 0       0 if ($bz)
2500             {
2501             ## this only works if this is a first level call.
2502 0 0       0 open my $IN, "<", $in{file} unless $in{file} eq '-';
2503 0         0 seek $IN, length($in{buf}), 0;
2504 0         0 while (!length $uncomp_buf)
2505             {
2506 0         0 my $stat = $bz->bzinflate($in{buf}, $uncomp_buf);
2507             # $bz->bzflush($uncomp_buf); # wishful thinking....
2508 0 0       0 last if length($in{buf}); # did not consume, strange.
2509 0 0       0 last if length $stat; # something wrong, or file ends.
2510 0 0       0 last unless read $IN, $in{buf}, 10*1024, length($in{buf}); # try to get more data
2511             }
2512 0         0 my $slurped = tell $IN; # likely to get ca. 800k yacc!
2513 0         0 close $IN;
2514             # use Data::Dumper; warn Dumper $stat, length($in{buf}), length($uncomp_buf), "slurped=$slurped";
2515             }
2516             }
2517              
2518             ## try to get at the second level mime type, for some well known linear compressors.
2519 0   0     0 while (length $uncomp_buf && $r[0] =~ m{^application/(x-)?([+\w]+)$})
2520             {
2521 0         0 my $compname = $2;
2522 0         0 my $next_uncomp_buf = '';
2523              
2524             # use Data::Dumper; printf STDERR "calling mime with buf=%d bytes, compname=$compname\n", length($uncomp_buf);
2525              
2526             #########
2527             ## FIXME: adding +$compname to the filename prevents reopening in mime, if needed.
2528             ## Why did I do this in the first place?
2529             # my $m2 = $self->mime(buf => $uncomp_buf, file => "$in{file}+$compname", uncomp => \$next_uncomp_buf, recursion => 1);
2530             #########
2531              
2532 0         0 my $m2 = $self->mime(buf => $uncomp_buf, file => $in{file}, uncomp => \$next_uncomp_buf, recursion => 1);
2533 0         0 my ($a,$xminus,$b) = ($m2->[0] =~ m{^(.*)/(x-)?(.*)$});
2534 0 0       0 if ($a eq 'application')
2535             {
2536 0         0 $r[0] = "application/x-$b+$compname"
2537             }
2538             else
2539             {
2540 0         0 $r[0] = "application/x-$a-$b+$compname"
2541             }
2542 0         0 $r[2] .= "\n" . $m2->[2];
2543 0         0 $uncomp_buf = $next_uncomp_buf;
2544             # print Dumper "new: ", \@r, $m2, $compname, length($uncomp_buf);
2545             }
2546              
2547             # use Data::Dumper;
2548             # die Dumper \@r, "--------------------";
2549              
2550 0 0 0     0 if ($r[0] eq 'application/unknown+zip' and $r[2] =~ m{\btext\b}i)
2551             {
2552             # empty.odt
2553             # ['application/unknown+zip; charset=binary','Zip archive data, at least v2.0 to extract, mime type application/vnd OpenDocument Text']
2554             # application/vnd.oasis.opendocument.text
2555 0 0 0     0 if ($mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); })
  0         0  
  0         0  
2556             {
2557 0 0       0 $mime2 .= '+zip' unless $mime2 =~ m{\+zip}i;
2558 0 0       0 $r[0] = $mime2 if $mime2 =~ m{^application/};
2559             }
2560             }
2561 0 0       0 $r[0] .= '+zip' if $r[0] =~ m{^application/vnd\.oasis\.opendocument\.text$};
2562              
2563 0 0 0     0 if ($r[0] eq 'text/plain' and $in{file} =~ m{\.(mm|b64|base64)$}i)
2564             {
2565 0         0 my $suf = lc $1;
2566 0         0 $r[0] = "text/x-suffix-$suf+plain";
2567             }
2568              
2569 0 0 0     0 if ($r[0] eq 'application/octet-stream' and $in{file} =~ m{\.(lzma|zx|lz)$}i)
2570             {
2571 0         0 my $suf = lc $1;
2572 0         0 $r[0] = "application/x-suffix-$suf+octet-stream";
2573             }
2574              
2575 0 0       0 if ($r[0] =~ m{^application/x-(ms-dos-|)executable$})
2576             {
2577 0 0       0 if (-x '/usr/bin/upx')
2578             {
2579             # upx refuses to read symlinks. Work around this.
2580 0         0 my $in_file = $in{file};
2581 0 0       0 $in_file = readlink($in{file}) if -l $in{file};
2582 0 0       0 $r[0] .= '+upx' unless run(['/usr/bin/upx', '-q', '-q', '-t', $in_file]);
2583             }
2584             }
2585              
2586 0 0       0 ${$in{uncomp}} = $uncomp_buf if ref $in{uncomp} eq 'SCALAR';
  0         0  
2587 0 0 0     0 $r[3] = [ $mime1, $mime2 ] if $mime1 ne $r[0] or ($mime2 and $mime2 ne $mime1);
      0        
2588              
2589 0         0 return \@r;
2590             }
2591              
2592             =head1 AUTHOR
2593              
2594             Juergen Weigert, C<< >>
2595              
2596             =head1 BUGS
2597              
2598             The implementation of C is an ugly hack. We suffer from the existence of
2599             multiple file magic databases, and multiple conflicting implementations. With
2600             Perl we have at least 5 modules for this; here we use two.
2601              
2602             The builtin list of MIME helpers is incomplete. Please submit your handler code.
2603              
2604             Please report any bugs or feature requests to C, or through
2605             the web interface at L. I will be notified, and then you'll
2606             automatically be notified of progress on your bug as I make changes.
2607              
2608              
2609             =head1 RELATED MODULES
2610              
2611             While designing File::Unpack, a range of other perl modules were examined. Many modules provide valuable service to File::Unpack and became dependencies or are recommended.
2612             Others exposed drawbacks during closer examination and may find some of their
2613             wheels re-invented here.
2614              
2615             =head2 Used Modules
2616              
2617             =over
2618              
2619             =item File::LibMagic
2620              
2621             This is the prefered mimetype engine. It disregards the suffix, recognizes more
2622             types than any of the alternatives, and uses exactly the same engine as
2623             /usr/bin/file in openSUSE systems. It also returns charset and description
2624             information. We crossreference the description with the mimetype to detect
2625             weaknesses, and consult File::MimeInfo::Magic and some own logic, for e.g.
2626             detecting LZMA compression which fails to provide any recognizable magic.
2627             Required if you use C; otherwise not a hard requirement.
2628              
2629             =item File::MimeInfo::Magic
2630              
2631             Uses both magic information and file suffixes to determine the mimetype. Its
2632             magic() function is used in a few cases, where File::LibMagic fails. E.g. as
2633             of June 2010, libmagic does not recognize 'image/x-targa'.
2634             File::MimeInfo::Magic may be slower, but it features the shared-mime-info
2635             database from freedesktop.org . Recommended if you use C.
2636              
2637             =item String::ShellQuote
2638              
2639             Used to call external MIME helpers. Required.
2640              
2641             =item BSD::Resource
2642              
2643             Used to reliably restrict the maximum file size. Recommended.
2644              
2645             =item File::Path
2646              
2647             mkpath(). Required.
2648              
2649             =item Cwd
2650              
2651             fast_abs_path(). Required.
2652              
2653             =item JSON
2654              
2655             Used for formatting the logfile. Required.
2656              
2657             =back
2658              
2659             =head2 Modules Not Used
2660              
2661             =over
2662              
2663             =item Archive::Extract
2664              
2665             Archive::Extract tries first to determine what type of archive you are passing
2666             it, by inspecting its suffix. 'Maybe this module should use something like
2667             "File::Type" to determine the type, rather than blindly trust the suffix'.
2668             [quoted from perldoc]
2669              
2670             Set $Archive::Extract::PREFER_BIN to 1, which will prefer the use of command
2671             line programs and won't consume so much memory. Default: use "Archive::Tar".
2672              
2673             =item Archive::Zip
2674              
2675             If you are just going to be extracting zips (and/or other archives) you are
2676             recommended to look at using Archive::Extract . [quoted from perldoc]
2677             It is pure perl, so it's a lot slower then your '/usr/bin/zip'.
2678              
2679             =item Archive::Tar
2680              
2681             It is pure Perl, so it's a lot slower then your "/bin/tar".
2682             It is heavy on memory, all will be read into memory. [quoted from perldoc]
2683              
2684             =item File::MMagic, File::MMagic::XS, File::Type
2685              
2686             Compared to File::LibMagic and File::MimeInfo::Magic, these three are inferior.
2687             They often say 'text/plain' or 'application/octet-stream' where the latter two report
2688             useful mimetypes.
2689              
2690             =back
2691              
2692             =head1 SUPPORT
2693              
2694             You can find documentation for this module with the perldoc command.
2695              
2696             perldoc File::Unpack
2697              
2698              
2699             You can also look for information at:
2700              
2701             =over 4
2702              
2703             =item * RT: CPAN's request tracker
2704              
2705             L
2706              
2707             =item * AnnoCPAN: Annotated CPAN documentation
2708              
2709             L
2710              
2711             =item * CPAN Ratings
2712              
2713             L
2714              
2715             =item * Search CPAN
2716              
2717             L
2718              
2719             =back
2720              
2721             =head1 SOURCE REPOSITORY
2722              
2723             L
2724              
2725             L
2726              
2727             git clone L
2728              
2729              
2730             =head1 ACKNOWLEDGEMENTS
2731              
2732             MIME type recognition relies heavily on libmagic by Christos Zoulas. I had long
2733             hesitated implementing File::Unpack, but set to work, when I dicovered
2734             that File::LibMagic brings your library to perl. Thanks Christos. And thanks
2735             for tcsh too.
2736              
2737             =head1 LICENSE AND COPYRIGHT
2738              
2739             Copyright 2010,2011,2012,2013 Juergen Weigert.
2740              
2741             This program is free software; you can redistribute it and/or modify it
2742             under the terms of either: the GNU General Public License as published
2743             by the Free Software Foundation; or the Artistic License.
2744              
2745             See http://dev.perl.org/licenses/ for more information.
2746              
2747              
2748             =cut
2749              
2750             1; # End of File::Unpack