File Coverage

blib/lib/File/MimeInfo.pm
Criterion Covered Total %
statement 190 203 93.6
branch 114 176 64.7
condition 13 24 54.1
subroutine 24 24 100.0
pod 13 13 100.0
total 354 440 80.4


line stmt bran cond sub pod time code
1             package File::MimeInfo;
2              
3 8     8   111075 use strict;
  8         33  
  8         235  
4 8     8   40 use warnings;
  8         13  
  8         197  
5 8     8   36 use Carp;
  8         14  
  8         464  
6 8     8   71 use Fcntl 'SEEK_SET';
  8         17  
  8         344  
7 8     8   45 use File::Spec;
  8         12  
  8         233  
8 8     8   2202 use File::BaseDir qw/data_files/;
  8         6012  
  8         7901  
9             require Exporter;
10              
11             our @ISA = qw(Exporter);
12             our @EXPORT = qw(mimetype);
13             our @EXPORT_OK = qw(extensions describe globs inodetype mimetype_canon mimetype_isa);
14             our $VERSION = '0.31';
15             our $DEBUG;
16              
17             our ($_hashed, $_hashed_aliases, $_hashed_subclasses, $_has_mimeinfo_database);
18             our (@globs, %literal, %extension, %mime2ext, %aliases, %subclasses);
19             our ($LANG, @DIRS);
20             # @globs = [ [ 'glob', qr//, $mime_string ], ... ]
21             # %literal contains literal matches
22             # %extension contains extensions (globs matching /^\*(\.\w)+$/ )
23             # %mime2ext is used for looking up extension by mime type
24             # %aliases contains the aliases table
25             # %subclasses contains the subclasses table
26             # $LANG can be used to set a default language for the comments
27             # @DIRS can be used to specify custom database directories
28              
29 1     1 1 440 sub new { bless \$VERSION, shift } # what else is there to bless ?
30              
31             sub mimetype {
32 21     21 1 5645 my $file = pop;
33 21 50       48 croak 'subroutine "mimetype" needs a filename as argument' unless defined $file;
34             return
35 21   100     32 inodetype($file) ||
36             globs($file) ||
37             default($file);
38             }
39              
40             sub inodetype {
41 29     29 1 38 my $file = pop;
42 29 50       56 print STDERR "> Checking inode type\n" if $DEBUG;
43 29 100       524 lstat $file or return undef;
44 16 100       90 return undef if -f _;
45 2 0       23 my $t = (-l $file) ? 'inode/symlink' : # Win32 does not like '_' here
    0          
    0          
    0          
    50          
    100          
46             (-d _) ? 'inode/directory' :
47             (-p _) ? 'inode/fifo' :
48             (-c _) ? 'inode/chardevice' :
49             (-b _) ? 'inode/blockdevice' :
50             (-S _) ? 'inode/socket' : '' ;
51 2 100       8 if ($t eq 'inode/directory') { # compare devices to detect mount-points
52 1         4 my $dev = (stat _)[0]; # device of the node under investigation
53 1         39 $file = File::Spec->rel2abs($file); # get full path
54 1         14 my @dirs = File::Spec->splitdir($file);
55 1         9 $file = File::Spec->catfile(@dirs); # removes trailing '/' or equivalent
56 1 50       17 return $t if -l $file; # parent can be on other dev for links
57 1         2 pop @dirs;
58 1         7 my $dir = File::Spec->catdir(@dirs); # parent dir
59 1 50       13 $t = 'inode/mount-point' unless (stat $dir)[0] == $dev; # compare devices
60 1         8 return $t;
61             }
62 1 50       8 else { return $t ? $t : undef }
63             }
64              
65             sub globs {
66 25     25 1 52 my $file = pop;
67 25 50       88 croak 'subroutine "globs" needs a filename as argument' unless defined $file;
68 25 100       48 rehash() unless $_hashed;
69 25         328 (undef, undef, $file) = File::Spec->splitpath($file); # remove path
70 25 50       82 print STDERR "> Checking globs for basename '$file'\n" if $DEBUG;
71              
72 25 100       68 return $literal{$file} if exists $literal{$file};
73              
74 24 100       74 if ($file =~ /\.(\w+(\.\w+)*)$/) {
75 11         32 my @ext = split /\./, $1;
76 11         22 while (@ext) {
77 16         30 my $ext = join('.', @ext);
78 16 50       27 print STDERR "> Checking for extension '.$ext'\n" if $DEBUG;
79 16 50       21 carp "WARNING: wantarray behaviour of globs() will change in the future.\n" if wantarray;
80             return wantarray
81             ? ($extension{$ext}, $ext)
82             : $extension{$ext}
83 16 50       98 if exists $extension{$ext};
    100          
84 6         11 shift @ext;
85             }
86             }
87              
88 14         28 for (@globs) {
89 14 100       107 next unless $file =~ $_->[1];
90 2 50       6 print STDERR "> This file name matches \"$_->[0]\"\n" if $DEBUG;
91 2         13 return $_->[2];
92             }
93              
94 12 100       44 return globs(lc $file) if $file =~ /[A-Z]/; # recurs
95 11         42 return undef;
96             }
97              
98             sub default {
99 8     8 1 11 my $file = pop;
100 8 50       19 croak 'subroutine "default" needs a filename as argument' unless defined $file;
101              
102 8         11 my $line;
103 8 0       18 unless (ref $file) {
    50          
104 8 100       106 return undef unless -f $file;
105 7 50       20 print STDERR "> File exists, trying default method\n" if $DEBUG;
106 7 100       73 return 'text/plain' if -z $file;
107              
108 6 50       178 open FILE, '<', $file or return undef;
109 6 50       356 binmode FILE, ':utf8' unless $] < 5.008;
110 6         139 read FILE, $line, 32;
111 6         81 close FILE;
112             }
113 0         0 elsif (ref $file eq 'Path::Tiny') {
114 0 0       0 return undef unless $file->exists;
115 0 0       0 print STDERR "> File is Path::Tiny object and exists, "
116             . "trying default method\n" if $DEBUG;
117 0 0       0 open my $fh, '<', $file or return undef;
118 0 0       0 binmode FILE, ':utf8' unless $] < 5.008;
119 0         0 read $fh, $line, 32;
120 0         0 close $fh;
121             }
122             else {
123 0 0       0 print STDERR "> Trying default method on object\n" if $DEBUG;
124              
125 0         0 $file->seek(0, SEEK_SET);
126 0         0 $file->read($line, 32);
127             }
128              
129             {
130 8     8   69 no warnings; # warnings can be thrown when input not ascii
  8         18  
  8         522  
  6         13  
131 6 100 66     43 if ($] < 5.008 or ! utf8::valid($line)) {
132 8     8   4560 use bytes; # avoid invalid utf8 chars
  8         111  
  8         46  
133 2         18 $line =~ s/\s//g; # \m, \n and \t are also control chars
134 2 50       12 return 'text/plain' unless $line =~ /[\x00-\x1F\x7F]/;
135             }
136             else {
137             # use perl to do something intelligent for ascii & utf8
138 4 100       50 return 'text/plain' unless $line =~ /[^[:print:]\s]/;
139             }
140             }
141 3 50       9 print STDERR "> First 10 bytes of the file contain control chars\n" if $DEBUG;
142 3         22 return 'application/octet-stream';
143             }
144              
145             sub rehash {
146 4     4 1 534 (@globs, %literal, %extension, %mime2ext) = (); # clear all data
147 4         8 local $_; # limit scope of $_ ... :S
148             my @globfiles = @DIRS
149 4 50       32 ? ( grep {-e $_ && -r $_} map "$_/globs", @DIRS )
  1 100       34  
150             : ( reverse data_files('mime/globs') );
151 4 100       443 if (@globfiles) {
152 3         16 $_has_mimeinfo_database = 1;
153             } else {
154 1         219 carp "WARNING: You don't seem to have a mime-info database. " .
155             "The shared-mime-info package is available from http://freedesktop.org/";
156             }
157 4         12 my @done;
158 4         11 for my $file (@globfiles) {
159 5 100       13 next if grep {$file eq $_} @done;
  2         9  
160 3         18 _hash_globs($file);
161 3         11 push @done, $file;
162             }
163 4         11 $_hashed = 1;
164             }
165              
166             sub _hash_globs {
167 3     3   6 my $file = shift;
168 3   33     91 open GLOB, '<', $file || croak "Could not open file '$file' for reading" ;
169 3 50       28 binmode GLOB, ':utf8' unless $] < 5.008;
170 3         8 my ($string, $glob);
171 3         96 while () {
172 33 100 66     178 next if /^\s*#/ or ! /\S/; # skip comments and empty lines
173 30         64 chomp;
174 30         94 ($string, $glob) = split /:/, $_, 2;
175 30 100       125 unless ($glob =~ /[\?\*\[]/) { $literal{$glob} = $string }
  3 100       15  
176 0         0 elsif ($glob =~ /^\*\.(\w+(\.\w+)*)$/) {
177 24 100       101 $extension{$1} = $string unless exists $extension{$1};
178 24 100       75 $mime2ext{$string} = [] if !defined($mime2ext{$string});
179 24         30 push @{$mime2ext{$string}}, $1;
  24         147  
180 3         10 } else { unshift @globs, [$glob, _glob_to_regexp($glob), $string] }
181             }
182 3 50       44 close GLOB || croak "Could not open file '$file' for reading" ;
183             }
184              
185             sub _glob_to_regexp {
186 7     7   7865 my $glob = shift;
187 7         24 $glob =~ s/\./\\./g;
188 7         33 $glob =~ s/([?*])/.$1/g;
189 7         18 $glob =~ s/([^\w\/\\\.\?\*\[\]])/\\$1/g;
190 7         154 qr/^$glob$/;
191             }
192              
193             sub has_mimeinfo_database {
194 1 50   1 1 226 rehash() if (!$_hashed);
195 1         3 return $_has_mimeinfo_database;
196             }
197              
198             sub extensions {
199 2     2 1 557 my $mimet = mimetype_canon(pop @_);
200 2 100       7 rehash() unless $_hashed;
201 2 50       7 my $ref = $mime2ext{$mimet} if exists $mime2ext{$mimet};
202 2 50       5 return $ref ? @{$ref} : undef if wantarray;
  1 100       8  
203 1 50       4 return $ref ? @{$ref}[0] : '';
  1         9  
204             }
205              
206             sub describe {
207 2 50   2 1 457 shift if ref $_[0];
208 2         6 my ($mt, $lang) = @_;
209 2 50       5 croak 'subroutine "describe" needs a mimetype as argument' unless $mt;
210 2         11 $mt = mimetype_canon($mt);
211 2 50       8 $lang = $LANG unless defined $lang;
212 2 100       6 my $att = $lang ? qq{xml:lang="$lang"} : '';
213 2         3 my $desc;
214             my @descfiles = @DIRS
215 2 0       14 ? ( grep {-e $_ && -r $_} map "$_/$mt.xml", @DIRS )
  0 50       0  
216             : ( reverse data_files('mime', split '/', "$mt.xml") ) ;
217 2         177 for my $file (@descfiles) {
218 2         3 $desc = ''; # if a file was found, return at least empty string
219 2   33     53 open XML, '<', $file || croak "Could not open file '$file' for reading";
220 2 50       19 binmode XML, ':utf8' unless $] < 5.008;
221 2         39 while () {
222 20 100       111 next unless m!(.*?)!;
223 2         5 $desc = $1;
224 2         5 last;
225             }
226 2 50       29 close XML || croak "Could not open file '$file' for reading";
227 2 50       8 last if $desc;
228             }
229 2         12 return $desc;
230             }
231              
232             sub mimetype_canon {
233 16     16 1 609 my $mimet = pop;
234 16 50       37 croak 'mimetype_canon needs argument' unless defined $mimet;
235 16 100       50 rehash_aliases() unless $_hashed_aliases;
236 16 100       67 return exists($aliases{$mimet}) ? $aliases{$mimet} : $mimet;
237             }
238              
239             sub rehash_aliases {
240 2     2 1 7 %aliases = _read_map_files('aliases');
241 2         21 $_hashed_aliases++;
242             }
243              
244             sub _read_map_files {
245 3     3   6 my ($name, $list) = @_;
246             my @files = @DIRS
247 3 0       18 ? ( grep {-e $_ && -r $_} map "$_/$name", @DIRS )
  0 50       0  
248             : ( reverse data_files("mime/$name") );
249 3         403 my (@done, %map);
250 3         8 for my $file (@files) {
251 6 100       15 next if grep {$_ eq $file} @done;
  3         14  
252 3   33     102 open MAP, '<', $file || croak "Could not open file '$file' for reading";
253 3 50       26 binmode MAP, ':utf8' unless $] < 5.008;
254 3         92 while (my $line = ) {
255 6 50       33 next unless $line =~ m/\S/; # skip empty lines
256 6 50       28 next if $line =~ m/^\s*#/; # skip comment lines
257 6         15 chomp $line;
258 6         38 my ($k, $v) = split m/\s+/, $line, 2;
259 6 100       15 if ($list) {
260 2 100       8 $map{$k} = [] unless $map{$k};
261 2         4 push @{$map{$k}}, $v;
  2         17  
262             }
263 4         34 else { $map{$k} = $v }
264             }
265 3         32 close MAP;
266 3         12 push @done, $file;
267             }
268 3         22 return %map;
269             }
270              
271             sub mimetype_isa {
272 5   33 5 1 15 my $parent = pop || croak 'mimetype_isa needs argument';
273 5         8 my $mimet = pop;
274 5 100 66     20 if (ref $mimet or ! defined $mimet) {
275 2         5 $mimet = mimetype_canon($parent);
276 2         4 undef $parent;
277             }
278             else {
279 3         7 $mimet = mimetype_canon($mimet);
280 3         6 $parent = mimetype_canon($parent);
281             }
282 5 100       12 rehash_subclasses() unless $_hashed_subclasses;
283              
284 5         7 my @subc;
285 5 100       13 push @subc, 'inode/directory' if $mimet eq 'inode/mount-point';
286 5 100       11 push @subc, @{$subclasses{$mimet}} if exists $subclasses{$mimet};
  2         4  
287 5 100       20 push @subc, 'text/plain' if $mimet =~ m#^text/#;
288 5 100       13 push @subc, 'application/octet-stream' unless $mimet =~ m#^inode/#;
289              
290 5 100       20 return $parent ? scalar(grep {$_ eq $parent} @subc) : @subc;
  6         21  
291             }
292              
293             sub rehash_subclasses {
294 1     1 1 3 %subclasses = _read_map_files('subclasses', 'LIST');
295 1         3 $_hashed_subclasses++;
296             }
297              
298             1;
299              
300             __END__