File Coverage

blib/lib/File/MimeInfo.pm
Criterion Covered Total %
statement 190 203 93.6
branch 114 176 64.7
condition 13 24 54.1
subroutine 24 24 100.0
pod 13 13 100.0
total 354 440 80.4


line stmt bran cond sub pod time code
1             package File::MimeInfo;
2              
3 8     8   117157 use strict;
  8         36  
  8         207  
4 8     8   35 use warnings;
  8         12  
  8         167  
5 8     8   35 use Carp;
  8         11  
  8         519  
6 8     8   89 use Fcntl 'SEEK_SET';
  8         15  
  8         318  
7 8     8   41 use File::Spec;
  8         12  
  8         196  
8 8     8   1939 use File::BaseDir qw/data_files/;
  8         5667  
  8         7203  
9             require Exporter;
10              
11             our @ISA = qw(Exporter);
12             our @EXPORT = qw(mimetype);
13             our @EXPORT_OK = qw(extensions describe globs inodetype mimetype_canon mimetype_isa);
14             our $VERSION = '0.32';
15             our $DEBUG;
16              
17             our ($_hashed, $_hashed_aliases, $_hashed_subclasses, $_has_mimeinfo_database);
18             our (@globs, %literal, %extension, %mime2ext, %aliases, %subclasses);
19             our ($LANG, @DIRS);
20             # @globs = [ [ 'glob', qr//, $mime_string ], ... ]
21             # %literal contains literal matches
22             # %extension contains extensions (globs matching /^\*(\.\w)+$/ )
23             # %mime2ext is used for looking up extension by mime type
24             # %aliases contains the aliases table
25             # %subclasses contains the subclasses table
26             # $LANG can be used to set a default language for the comments
27             # @DIRS can be used to specify custom database directories
28              
29 1     1 1 428 sub new { bless \$VERSION, shift } # what else is there to bless ?
30              
31             sub mimetype {
32 21     21 1 5564 my $file = pop;
33 21 50       47 croak 'subroutine "mimetype" needs a filename as argument' unless defined $file;
34             return
35 21   100     37 inodetype($file) ||
36             globs($file) ||
37             default($file);
38             }
39              
40             sub inodetype {
41 29     29 1 35 my $file = pop;
42 29 50       55 print STDERR "> Checking inode type\n" if $DEBUG;
43 29 100       434 lstat $file or return undef;
44 16 100       80 return undef if -f _;
45 2 0       19 my $t = (-l $file) ? 'inode/symlink' : # Win32 does not like '_' here
    0          
    0          
    0          
    50          
    100          
46             (-d _) ? 'inode/directory' :
47             (-p _) ? 'inode/fifo' :
48             (-c _) ? 'inode/chardevice' :
49             (-b _) ? 'inode/blockdevice' :
50             (-S _) ? 'inode/socket' : '' ;
51 2 100       5 if ($t eq 'inode/directory') { # compare devices to detect mount-points
52 1         3 my $dev = (stat _)[0]; # device of the node under investigation
53 1         33 $file = File::Spec->rel2abs($file); # get full path
54 1         10 my @dirs = File::Spec->splitdir($file);
55 1         7 $file = File::Spec->catfile(@dirs); # removes trailing '/' or equivalent
56 1 50       13 return $t if -l $file; # parent can be on other dev for links
57 1         2 pop @dirs;
58 1         6 my $dir = File::Spec->catdir(@dirs); # parent dir
59 1 50       11 $t = 'inode/mount-point' unless (stat $dir)[0] == $dev; # compare devices
60 1         8 return $t;
61             }
62 1 50       8 else { return $t ? $t : undef }
63             }
64              
65             sub globs {
66 25     25 1 51 my $file = pop;
67 25 50       80 croak 'subroutine "globs" needs a filename as argument' unless defined $file;
68 25 100       47 rehash() unless $_hashed;
69 25         304 (undef, undef, $file) = File::Spec->splitpath($file); # remove path
70 25 50       60 print STDERR "> Checking globs for basename '$file'\n" if $DEBUG;
71              
72 25 100       56 return $literal{$file} if exists $literal{$file};
73              
74 24 100       73 if ($file =~ /\.(\w+(\.\w+)*)$/) {
75 11         28 my @ext = split /\./, $1;
76 11         22 while (@ext) {
77 16         32 my $ext = join('.', @ext);
78 16 50       48 print STDERR "> Checking for extension '.$ext'\n" if $DEBUG;
79 16 50       25 carp "WARNING: wantarray behaviour of globs() will change in the future.\n" if wantarray;
80             return wantarray
81             ? ($extension{$ext}, $ext)
82             : $extension{$ext}
83 16 50       92 if exists $extension{$ext};
    100          
84 6         13 shift @ext;
85             }
86             }
87              
88 14         27 for (@globs) {
89 14 100       100 next unless $file =~ $_->[1];
90 2 50       5 print STDERR "> This file name matches \"$_->[0]\"\n" if $DEBUG;
91 2         12 return $_->[2];
92             }
93              
94 12 100       35 return globs(lc $file) if $file =~ /[A-Z]/; # recurs
95 11         37 return undef;
96             }
97              
98             sub default {
99 8     8 1 11 my $file = pop;
100 8 50       17 croak 'subroutine "default" needs a filename as argument' unless defined $file;
101              
102 8         11 my $line;
103 8 0       16 unless (ref $file) {
    50          
104 8 100       98 return undef unless -f $file;
105 7 50       19 print STDERR "> File exists, trying default method\n" if $DEBUG;
106 7 100       65 return 'text/plain' if -z $file;
107              
108 6 50       153 open FILE, '<', $file or return undef;
109 6 50       41 binmode FILE, ':utf8' unless $] < 5.008;
110 6         127 read FILE, $line, 32;
111 6         60 close FILE;
112             }
113 0         0 elsif (ref $file eq 'Path::Tiny') {
114 0 0       0 return undef unless $file->exists;
115 0 0       0 print STDERR "> File is Path::Tiny object and exists, "
116             . "trying default method\n" if $DEBUG;
117 0 0       0 open my $fh, '<', $file or return undef;
118 0 0       0 binmode FILE, ':utf8' unless $] < 5.008;
119 0         0 read $fh, $line, 32;
120 0         0 close $fh;
121             }
122             else {
123 0 0       0 print STDERR "> Trying default method on object\n" if $DEBUG;
124              
125 0         0 $file->seek(0, SEEK_SET);
126 0         0 $file->read($line, 32);
127             }
128              
129             {
130 8     8   64 no warnings; # warnings can be thrown when input not ascii
  8         18  
  8         497  
  6         14  
131 6 100 66     37 if ($] < 5.008 or ! utf8::valid($line)) {
132 8     8   4258 use bytes; # avoid invalid utf8 chars
  8         102  
  8         42  
133 2         15 $line =~ s/\s//g; # \m, \n and \t are also control chars
134 2 50       12 return 'text/plain' unless $line =~ /[\x00-\x1F\x7F]/;
135             }
136             else {
137             # use perl to do something intelligent for ascii & utf8
138 4 100       45 return 'text/plain' unless $line =~ /[^[:print:]\s]/;
139             }
140             }
141 3 50       8 print STDERR "> First 10 bytes of the file contain control chars\n" if $DEBUG;
142 3         21 return 'application/octet-stream';
143             }
144              
145             sub rehash {
146 4     4 1 471 (@globs, %literal, %extension, %mime2ext) = (); # clear all data
147 4         6 local $_; # limit scope of $_ ... :S
148             my @globfiles = @DIRS
149 4 50       41 ? ( grep {-e $_ && -r $_} map "$_/globs", @DIRS )
  1 100       28  
150             : ( reverse data_files('mime/globs') );
151 4 100       421 if (@globfiles) {
152 3         13 $_has_mimeinfo_database = 1;
153             } else {
154 1         514 carp "WARNING: You don't seem to have a mime-info database. " .
155             "The shared-mime-info package is available from http://freedesktop.org/";
156             }
157 4         23 my @done;
158 4         8 for my $file (@globfiles) {
159 5 100       13 next if grep {$file eq $_} @done;
  2         7  
160 3         7 _hash_globs($file);
161 3         11 push @done, $file;
162             }
163 4         12 $_hashed = 1;
164             }
165              
166             sub _hash_globs {
167 3     3   6 my $file = shift;
168 3   33     79 open GLOB, '<', $file || croak "Could not open file '$file' for reading" ;
169 3 50       23 binmode GLOB, ':utf8' unless $] < 5.008;
170 3         8 my ($string, $glob);
171 3         102 while () {
172 33 100 66     153 next if /^\s*#/ or ! /\S/; # skip comments and empty lines
173 30         63 chomp;
174 30         85 ($string, $glob) = split /:/, $_, 2;
175 30 100       114 unless ($glob =~ /[\?\*\[]/) { $literal{$glob} = $string }
  3 100       12  
176 0         0 elsif ($glob =~ /^\*\.(\w+(\.\w+)*)$/) {
177 24 100       98 $extension{$1} = $string unless exists $extension{$1};
178 24 100       64 $mime2ext{$string} = [] if !defined($mime2ext{$string});
179 24         42 push @{$mime2ext{$string}}, $1;
  24         125  
180 3         10 } else { unshift @globs, [$glob, _glob_to_regexp($glob), $string] }
181             }
182 3 50       45 close GLOB || croak "Could not open file '$file' for reading" ;
183             }
184              
185             sub _glob_to_regexp {
186 7     7   7848 my $glob = shift;
187 7         24 $glob =~ s/\./\\./g;
188 7         31 $glob =~ s/([?*])/.$1/g;
189 7         17 $glob =~ s/([^\w\/\\\.\?\*\[\]])/\\$1/g;
190 7         150 qr/^$glob$/;
191             }
192              
193             sub has_mimeinfo_database {
194 1 50   1 1 281 rehash() if (!$_hashed);
195 1         3 return $_has_mimeinfo_database;
196             }
197              
198             sub extensions {
199 2     2 1 455 my $mimet = mimetype_canon(pop @_);
200 2 100       5 rehash() unless $_hashed;
201 2 50       3 my $ref = $mime2ext{$mimet} if exists $mime2ext{$mimet};
202 2 50       5 return $ref ? @{$ref} : undef if wantarray;
  1 100       7  
203 1 50       3 return $ref ? @{$ref}[0] : '';
  1         6  
204             }
205              
206             sub describe {
207 2 50   2 1 456 shift if ref $_[0];
208 2         5 my ($mt, $lang) = @_;
209 2 50       6 croak 'subroutine "describe" needs a mimetype as argument' unless $mt;
210 2         7 $mt = mimetype_canon($mt);
211 2 50       7 $lang = $LANG unless defined $lang;
212 2 100       4 my $att = $lang ? qq{xml:lang="$lang"} : '';
213 2         2 my $desc;
214             my @descfiles = @DIRS
215 2 0       11 ? ( grep {-e $_ && -r $_} map "$_/$mt.xml", @DIRS )
  0 50       0  
216             : ( reverse data_files('mime', split '/', "$mt.xml") ) ;
217 2         170 for my $file (@descfiles) {
218 2         3 $desc = ''; # if a file was found, return at least empty string
219 2   33     49 open XML, '<', $file || croak "Could not open file '$file' for reading";
220 2 50       16 binmode XML, ':utf8' unless $] < 5.008;
221 2         37 while () {
222 20 100       117 next unless m!(.*?)!;
223 2         5 $desc = $1;
224 2         4 last;
225             }
226 2 50       28 close XML || croak "Could not open file '$file' for reading";
227 2 50       6 last if $desc;
228             }
229 2         12 return $desc;
230             }
231              
232             sub mimetype_canon {
233 16     16 1 482 my $mimet = pop;
234 16 50       29 croak 'mimetype_canon needs argument' unless defined $mimet;
235 16 100       43 rehash_aliases() unless $_hashed_aliases;
236 16 100       57 return exists($aliases{$mimet}) ? $aliases{$mimet} : $mimet;
237             }
238              
239             sub rehash_aliases {
240 2     2 1 5 %aliases = _read_map_files('aliases');
241 2         15 $_hashed_aliases++;
242             }
243              
244             sub _read_map_files {
245 3     3   4 my ($name, $list) = @_;
246             my @files = @DIRS
247 3 0       14 ? ( grep {-e $_ && -r $_} map "$_/$name", @DIRS )
  0 50       0  
248             : ( reverse data_files("mime/$name") );
249 3         296 my (@done, %map);
250 3         7 for my $file (@files) {
251 6 100       12 next if grep {$_ eq $file} @done;
  3         12  
252 3   33     81 open MAP, '<', $file || croak "Could not open file '$file' for reading";
253 3 50       22 binmode MAP, ':utf8' unless $] < 5.008;
254 3         86 while (my $line = ) {
255 6 50       27 next unless $line =~ m/\S/; # skip empty lines
256 6 50       24 next if $line =~ m/^\s*#/; # skip comment lines
257 6         14 chomp $line;
258 6         30 my ($k, $v) = split m/\s+/, $line, 2;
259 6 100       16 if ($list) {
260 2 100       6 $map{$k} = [] unless $map{$k};
261 2         4 push @{$map{$k}}, $v;
  2         13  
262             }
263 4         30 else { $map{$k} = $v }
264             }
265 3         29 close MAP;
266 3         11 push @done, $file;
267             }
268 3         19 return %map;
269             }
270              
271             sub mimetype_isa {
272 5   33 5 1 11 my $parent = pop || croak 'mimetype_isa needs argument';
273 5         6 my $mimet = pop;
274 5 100 66     18 if (ref $mimet or ! defined $mimet) {
275 2         4 $mimet = mimetype_canon($parent);
276 2         4 undef $parent;
277             }
278             else {
279 3         6 $mimet = mimetype_canon($mimet);
280 3         6 $parent = mimetype_canon($parent);
281             }
282 5 100       10 rehash_subclasses() unless $_hashed_subclasses;
283              
284 5         8 my @subc;
285 5 100       9 push @subc, 'inode/directory' if $mimet eq 'inode/mount-point';
286 5 100       9 push @subc, @{$subclasses{$mimet}} if exists $subclasses{$mimet};
  2         4  
287 5 100       16 push @subc, 'text/plain' if $mimet =~ m#^text/#;
288 5 100       11 push @subc, 'application/octet-stream' unless $mimet =~ m#^inode/#;
289              
290 5 100       17 return $parent ? scalar(grep {$_ eq $parent} @subc) : @subc;
  6         17  
291             }
292              
293             sub rehash_subclasses {
294 1     1 1 2 %subclasses = _read_map_files('subclasses', 'LIST');
295 1         2 $_hashed_subclasses++;
296             }
297              
298             1;
299              
300             __END__