File Coverage

blib/lib/File/MimeInfo.pm
Criterion Covered Total %
statement 190 203 93.6
branch 114 176 64.7
condition 13 24 54.1
subroutine 24 24 100.0
pod 13 13 100.0
total 354 440 80.4


line stmt bran cond sub pod time code
1             package File::MimeInfo;
2              
3 8     8   117400 use strict;
  8         31  
  8         196  
4 8     8   37 use warnings;
  8         11  
  8         195  
5 8     8   33 use Carp;
  8         12  
  8         381  
6 8     8   58 use Fcntl 'SEEK_SET';
  8         12  
  8         291  
7 8     8   37 use File::Spec;
  8         13  
  8         187  
8 8     8   1911 use File::BaseDir qw/data_files/;
  8         5508  
  8         7206  
9             require Exporter;
10              
11             our @ISA = qw(Exporter);
12             our @EXPORT = qw(mimetype);
13             our @EXPORT_OK = qw(extensions describe globs inodetype mimetype_canon mimetype_isa);
14             our $VERSION = '0.33';
15             our $DEBUG;
16              
17             our ($_hashed, $_hashed_aliases, $_hashed_subclasses, $_has_mimeinfo_database);
18             our (@globs, %literal, %extension, %mime2ext, %aliases, %subclasses);
19             our ($LANG, @DIRS);
20             # @globs = [ [ 'glob', qr//, $mime_string ], ... ]
21             # %literal contains literal matches
22             # %extension contains extensions (globs matching /^\*(\.\w)+$/ )
23             # %mime2ext is used for looking up extension by mime type
24             # %aliases contains the aliases table
25             # %subclasses contains the subclasses table
26             # $LANG can be used to set a default language for the comments
27             # @DIRS can be used to specify custom database directories
28              
29 1     1 1 421 sub new { bless \$VERSION, shift } # what else is there to bless ?
30              
31             sub mimetype {
32 21     21 1 5571 my $file = pop;
33 21 50       45 croak 'subroutine "mimetype" needs a filename as argument' unless defined $file;
34             return
35 21   100     28 inodetype($file) ||
36             globs($file) ||
37             default($file);
38             }
39              
40             sub inodetype {
41 29     29 1 38 my $file = pop;
42 29 50       48 print STDERR "> Checking inode type\n" if $DEBUG;
43 29 100       426 lstat $file or return undef;
44 16 100       74 return undef if -f _;
45 2 0       19 my $t = (-l $file) ? 'inode/symlink' : # Win32 does not like '_' here
    0          
    0          
    0          
    50          
    100          
46             (-d _) ? 'inode/directory' :
47             (-p _) ? 'inode/fifo' :
48             (-c _) ? 'inode/chardevice' :
49             (-b _) ? 'inode/blockdevice' :
50             (-S _) ? 'inode/socket' : '' ;
51 2 100       6 if ($t eq 'inode/directory') { # compare devices to detect mount-points
52 1         2 my $dev = (stat _)[0]; # device of the node under investigation
53 1         41 $file = File::Spec->rel2abs($file); # get full path
54 1         14 my @dirs = File::Spec->splitdir($file);
55 1         6 $file = File::Spec->catfile(@dirs); # removes trailing '/' or equivalent
56 1 50       13 return $t if -l $file; # parent can be on other dev for links
57 1         2 pop @dirs;
58 1         6 my $dir = File::Spec->catdir(@dirs); # parent dir
59 1 50       10 $t = 'inode/mount-point' unless (stat $dir)[0] == $dev; # compare devices
60 1         10 return $t;
61             }
62 1 50       11 else { return $t ? $t : undef }
63             }
64              
65             sub globs {
66 25     25 1 42 my $file = pop;
67 25 50       69 croak 'subroutine "globs" needs a filename as argument' unless defined $file;
68 25 100       40 rehash() unless $_hashed;
69 25         263 (undef, undef, $file) = File::Spec->splitpath($file); # remove path
70 25 50       57 print STDERR "> Checking globs for basename '$file'\n" if $DEBUG;
71              
72 25 100       54 return $literal{$file} if exists $literal{$file};
73              
74 24 100       66 if ($file =~ /\.(\w+(\.\w+)*)$/) {
75 11         31 my @ext = split /\./, $1;
76 11         22 while (@ext) {
77 16         31 my $ext = join('.', @ext);
78 16 50       22 print STDERR "> Checking for extension '.$ext'\n" if $DEBUG;
79 16 50       32 carp "WARNING: wantarray behaviour of globs() will change in the future.\n" if wantarray;
80             return wantarray
81             ? ($extension{$ext}, $ext)
82             : $extension{$ext}
83 16 50       88 if exists $extension{$ext};
    100          
84 6         9 shift @ext;
85             }
86             }
87              
88 14         24 for (@globs) {
89 14 100       85 next unless $file =~ $_->[1];
90 2 50       5 print STDERR "> This file name matches \"$_->[0]\"\n" if $DEBUG;
91 2         12 return $_->[2];
92             }
93              
94 12 100       32 return globs(lc $file) if $file =~ /[A-Z]/; # recurs
95 11         33 return undef;
96             }
97              
98             sub default {
99 8     8 1 13 my $file = pop;
100 8 50       12 croak 'subroutine "default" needs a filename as argument' unless defined $file;
101              
102 8         11 my $line;
103 8 0       16 unless (ref $file) {
    50          
104 8 100       89 return undef unless -f $file;
105 7 50       14 print STDERR "> File exists, trying default method\n" if $DEBUG;
106 7 100       61 return 'text/plain' if -z $file;
107              
108 6 50       163 open FILE, '<', $file or return undef;
109 6 50       33 binmode FILE, ':utf8' unless $] < 5.008;
110 6         147 read FILE, $line, 32;
111 6         57 close FILE;
112             }
113 0         0 elsif (ref $file eq 'Path::Tiny') {
114 0 0       0 return undef unless $file->exists;
115 0 0       0 print STDERR "> File is Path::Tiny object and exists, "
116             . "trying default method\n" if $DEBUG;
117 0 0       0 open my $fh, '<', $file or return undef;
118 0 0       0 binmode FILE, ':utf8' unless $] < 5.008;
119 0         0 read $fh, $line, 32;
120 0         0 close $fh;
121             }
122             else {
123 0 0       0 print STDERR "> Trying default method on object\n" if $DEBUG;
124              
125 0         0 $file->seek(0, SEEK_SET);
126 0         0 $file->read($line, 32);
127             }
128              
129             {
130 8     8   72 no warnings; # warnings can be thrown when input not ascii
  8         14  
  8         458  
  6         12  
131 6 100 66     31 if ($] < 5.008 or ! utf8::valid($line)) {
132 8     8   4135 use bytes; # avoid invalid utf8 chars
  8         101  
  8         43  
133 2         14 $line =~ s/\s//g; # \m, \n and \t are also control chars
134 2 50       10 return 'text/plain' unless $line =~ /[\x00-\x1F\x7F]/;
135             }
136             else {
137             # use perl to do something intelligent for ascii & utf8
138 4 100       46 return 'text/plain' unless $line =~ /[^[:print:]\s]/;
139             }
140             }
141 3 50       6 print STDERR "> First 10 bytes of the file contain control chars\n" if $DEBUG;
142 3         20 return 'application/octet-stream';
143             }
144              
145             sub rehash {
146 4     4 1 448 (@globs, %literal, %extension, %mime2ext) = (); # clear all data
147 4         6 local $_; # limit scope of $_ ... :S
148             my @globfiles = @DIRS
149 4 50       33 ? ( grep {-e $_ && -r $_} map "$_/globs", @DIRS )
  1 100       26  
150             : ( reverse data_files('mime/globs') );
151 4 100       481 if (@globfiles) {
152 3         12 $_has_mimeinfo_database = 1;
153             } else {
154 1         384 carp "WARNING: You don't seem to have a mime-info database. " .
155             "The shared-mime-info package is available from http://freedesktop.org/";
156             }
157 4         11 my @done;
158 4         9 for my $file (@globfiles) {
159 5 100       12 next if grep {$file eq $_} @done;
  2         7  
160 3         7 _hash_globs($file);
161 3         8 push @done, $file;
162             }
163 4         11 $_hashed = 1;
164             }
165              
166             sub _hash_globs {
167 3     3   6 my $file = shift;
168 3   33     77 open GLOB, '<', $file || croak "Could not open file '$file' for reading" ;
169 3 50       20 binmode GLOB, ':utf8' unless $] < 5.008;
170 3         6 my ($string, $glob);
171 3         85 while () {
172 33 100 66     149 next if /^\s*#/ or ! /\S/; # skip comments and empty lines
173 30         55 chomp;
174 30         81 ($string, $glob) = split /:/, $_, 2;
175 30 100       107 unless ($glob =~ /[\?\*\[]/) { $literal{$glob} = $string }
  3 100       11  
176 0         0 elsif ($glob =~ /^\*\.(\w+(\.\w+)*)$/) {
177 24 100       80 $extension{$1} = $string unless exists $extension{$1};
178 24 100       62 $mime2ext{$string} = [] if !defined($mime2ext{$string});
179 24         27 push @{$mime2ext{$string}}, $1;
  24         119  
180 3         8 } else { unshift @globs, [$glob, _glob_to_regexp($glob), $string] }
181             }
182 3 50       34 close GLOB || croak "Could not open file '$file' for reading" ;
183             }
184              
185             sub _glob_to_regexp {
186 7     7   7770 my $glob = shift;
187 7         25 $glob =~ s/\./\\./g;
188 7         32 $glob =~ s/([?*])/.$1/g;
189 7         16 $glob =~ s/([^\w\/\\\.\?\*\[\]])/\\$1/g;
190 7         141 qr/^$glob$/;
191             }
192              
193             sub has_mimeinfo_database {
194 1 50   1 1 267 rehash() if (!$_hashed);
195 1         3 return $_has_mimeinfo_database;
196             }
197              
198             sub extensions {
199 2     2 1 434 my $mimet = mimetype_canon(pop @_);
200 2 100       4 rehash() unless $_hashed;
201 2 50       5 my $ref = $mime2ext{$mimet} if exists $mime2ext{$mimet};
202 2 50       5 return $ref ? @{$ref} : undef if wantarray;
  1 100       6  
203 1 50       4 return $ref ? @{$ref}[0] : '';
  1         6  
204             }
205              
206             sub describe {
207 2 50   2 1 454 shift if ref $_[0];
208 2         5 my ($mt, $lang) = @_;
209 2 50       4 croak 'subroutine "describe" needs a mimetype as argument' unless $mt;
210 2         9 $mt = mimetype_canon($mt);
211 2 50       7 $lang = $LANG unless defined $lang;
212 2 100       6 my $att = $lang ? qq{xml:lang="$lang"} : '';
213 2         6 my $desc;
214             my @descfiles = @DIRS
215 2 0       10 ? ( grep {-e $_ && -r $_} map "$_/$mt.xml", @DIRS )
  0 50       0  
216             : ( reverse data_files('mime', split '/', "$mt.xml") ) ;
217 2         181 for my $file (@descfiles) {
218 2         13 $desc = ''; # if a file was found, return at least empty string
219 2   33     50 open XML, '<', $file || croak "Could not open file '$file' for reading";
220 2 50       17 binmode XML, ':utf8' unless $] < 5.008;
221 2         53 while () {
222 20 100       103 next unless m!(.*?)!;
223 2         6 $desc = $1;
224 2         3 last;
225             }
226 2 50       27 close XML || croak "Could not open file '$file' for reading";
227 2 50       7 last if $desc;
228             }
229 2         10 return $desc;
230             }
231              
232             sub mimetype_canon {
233 16     16 1 486 my $mimet = pop;
234 16 50       26 croak 'mimetype_canon needs argument' unless defined $mimet;
235 16 100       37 rehash_aliases() unless $_hashed_aliases;
236 16 100       55 return exists($aliases{$mimet}) ? $aliases{$mimet} : $mimet;
237             }
238              
239             sub rehash_aliases {
240 2     2 1 5 %aliases = _read_map_files('aliases');
241 2         14 $_hashed_aliases++;
242             }
243              
244             sub _read_map_files {
245 3     3   5 my ($name, $list) = @_;
246             my @files = @DIRS
247 3 0       12 ? ( grep {-e $_ && -r $_} map "$_/$name", @DIRS )
  0 50       0  
248             : ( reverse data_files("mime/$name") );
249 3         290 my (@done, %map);
250 3         6 for my $file (@files) {
251 6 100       13 next if grep {$_ eq $file} @done;
  3         11  
252 3   33     79 open MAP, '<', $file || croak "Could not open file '$file' for reading";
253 3 50       19 binmode MAP, ':utf8' unless $] < 5.008;
254 3         94 while (my $line = ) {
255 6 50       26 next unless $line =~ m/\S/; # skip empty lines
256 6 50       22 next if $line =~ m/^\s*#/; # skip comment lines
257 6         18 chomp $line;
258 6         30 my ($k, $v) = split m/\s+/, $line, 2;
259 6 100       12 if ($list) {
260 2 100       6 $map{$k} = [] unless $map{$k};
261 2         3 push @{$map{$k}}, $v;
  2         12  
262             }
263 4         30 else { $map{$k} = $v }
264             }
265 3         26 close MAP;
266 3         10 push @done, $file;
267             }
268 3         17 return %map;
269             }
270              
271             sub mimetype_isa {
272 5   33 5 1 14 my $parent = pop || croak 'mimetype_isa needs argument';
273 5         6 my $mimet = pop;
274 5 100 66     17 if (ref $mimet or ! defined $mimet) {
275 2         5 $mimet = mimetype_canon($parent);
276 2         3 undef $parent;
277             }
278             else {
279 3         4 $mimet = mimetype_canon($mimet);
280 3         4 $parent = mimetype_canon($parent);
281             }
282 5 100       13 rehash_subclasses() unless $_hashed_subclasses;
283              
284 5         5 my @subc;
285 5 100       10 push @subc, 'inode/directory' if $mimet eq 'inode/mount-point';
286 5 100       7 push @subc, @{$subclasses{$mimet}} if exists $subclasses{$mimet};
  2         4  
287 5 100       14 push @subc, 'text/plain' if $mimet =~ m#^text/#;
288 5 100       12 push @subc, 'application/octet-stream' unless $mimet =~ m#^inode/#;
289              
290 5 100       25 return $parent ? scalar(grep {$_ eq $parent} @subc) : @subc;
  6         18  
291             }
292              
293             sub rehash_subclasses {
294 1     1 1 2 %subclasses = _read_map_files('subclasses', 'LIST');
295 1         2 $_hashed_subclasses++;
296             }
297              
298             1;
299              
300             __END__