File Coverage

blib/lib/Mac/Alias/Parse.pm
Criterion Covered Total %
statement 141 153 92.1
branch 74 108 68.5
condition 2 6 33.3
subroutine 13 13 100.0
pod 0 6 0.0
total 230 286 80.4


line stmt bran cond sub pod time code
1             #
2             # A tiny pure-perl Mac Alias record class.
3             # Based on an unattributed text file found floating around
4             # on the internet, plus further research.
5             #
6              
7             package Mac::Alias::Parse;
8              
9             =head1 NAME
10              
11             Mac::Alias::Parse - Parse and create Mac Alias records
12              
13             =head1 SYNOPSIS
14              
15             use Mac::Alias::Parse;
16              
17             $fields = Mac::Alias::Parse::unpack_alias( $bytes );
18             $filename = $fields->{target}->{long_name};
19            
20             $bytes = Mac::Alias::Parse::pack_alias(
21             target => { inode => ..., long_name => ..., createdUTC => ..., ... },
22             folder => { ... },
23             inode_path => [ ... ],
24             ...
25             );
26              
27             =head1 DESCRIPTION
28              
29             The functions C and C convert between an
30             alias record, as found in various Mac data structures or on disk, and
31             an easier-to-manipulate Perl data structure.
32              
33             =cut
34              
35              
36             # Excerpt from text file:
37              
38             # ALIAS RECORD STRUCTURE
39             # A basic record is 150 bytes in length excluding extra info. The Mac
40             # OS uses colons in file paths instead of forward slashs as used in
41             # URLs, so the colon can't be used in file, directory nor disk
42             # names. Also directorys and files have a Mac OS name limit of 31
43             # characters. Disks have a limit of 27 characters.
44              
45             # (end excerpt)
46              
47             # The alias record starts with a length word; it is also self-delimiting
48             # (the last entry in the "Extra" list is a sentinel). This might be
49             # a relic of earlier extension efforts (fields being added to the end
50             # of the fixed structure, before the "Extra" stuff was implemented) or
51             # it might just be a processing convenience--- not sure.
52              
53             # The fixed part of the alias record looks like this:
54             #
55             # offs len what
56             # 6 2 Alias record version (we understand version 2)
57             # 8 2 Kind of item pointed to (0=file, 1=folder)
58             # 10 1 Length of volume name
59             # 11 27 Volume name (padded with NULs)
60             # 38 4 Volume creation date (seconds since Mac epoch, in local timezone)
61             # 42 2 Filesystem type ("volume signature", eg kHFSPlusSigWord)[1]
62             # 44 2 Volume type [2]
63             # 46 4 Containing folder's File Number (inode)
64             # 50 1 Filename length
65             # 51 63 Filename (padded with NULs)
66             # 114 4 Destination item's File Number (inode)
67             # 118 4 Item's creation date (seconds since Mac epoch)
68             # 122 4 Item's creator (FourCharCode)
69             # 126 4 Item's type (FourCharCode)
70             # 130 2 Number of levels From [3]
71             # 132 2 Number of levels To [3]
72             # 134 4 Volume attribute flags [???]
73             # 138 2 Volume file system ID (???, typically 0, or 'cu' for network mounts)
74             # 140 10 Reserved (set to zeroes)
75              
76             # The fixed part is followed by a series of "extra" fields in a tag-length-
77             # value style:
78              
79             # 0 2 Record type/tag (-1 / 65535 indicates end of list)
80             # 2 2 Length of data field
81             # 4 . Data
82             # . 0/1 Optional pad with 0 byte to even byte boundary
83              
84             # Record types/tags:
85              
86             # 0: Folder name (Carbon-mangled)
87             # 1: Inode-path to containing folder
88             # 2: Carbon pathname of file
89             # 3: AppleShare zone name [4]
90             # 4: AppleShare server name [4]
91             # 5: AppleShare user name [4]
92             # 6: Driver name [4]
93             # 9: Network mount info
94             # 10: AppleRemoteAccess dialup info [4]
95             # 14: Unicode filename [5]
96             # 15: Unicode volume name [5]
97             # 16: High-resolution date: volume creation date
98             # 17: High-resolution date: file creation date
99             # 18: POSIX path to file, treating volume root as /
100             # 19: POSIX path of volume mount point
101             # 20: Recursive alias record of volume's disk image file
102             # 21: Length of prefix of POSIX path which is user's home directory
103              
104             # The folder name (record 0) is mangled to fit in the 31-byte
105             # System 7 HFS-not-plus limit.
106              
107             # The inode-path (record 1) contains a sequence of 4-byte inode numbers,
108             # starting with the containing folder and continuing up to the volume's
109             # root. (The root isn't included; if the containing folder *is* the root,
110             # this is a zero-length list.) The first value is the same as the folder's
111             # inode in the fixed portion of the record, if both exist.
112              
113             # The network mount info record appears to contain a network mount type,
114             # flags word, and URL of the mount point.
115              
116             # The high-resolution dates (16 and 17) seem to be normal Mac-epoch dates
117             # scaled by 2^16. In practice the fractional seconds always seem to
118             # be zero. Not clear what they are the dates of.
119              
120             # [1] For reference on volume data types and magic numbers, see TN1150.
121             # [2] From the textfile: Fixed HD = 0; Network Disk = 1; 400kB FD = 2;
122             # 800kB FD = 3; 1.4MB FD = 4; Other Ejectable Media = 5
123             # [3] From and To are unclear to me. If unspecified they are -1 (65535),
124             # and in the aliases I've examined they are always -1. The textfile
125             # describes them as the number of "directories from alias thru to root"
126             # and "directories from root thru to source".
127             # [4] From textfile.
128             # [5] These contain a (redundant?) 2-byte length followed by UTF-16-BE data.
129              
130             # If an inode/fileID is missing (e.g. some network filesystems) it
131             # is stored as 0xFFFFFFFF.
132              
133 2     2   87342 use strict;
  2         6  
  2         78  
134 2     2   12 use Exporter ( );
  2         4  
  2         43  
135 2     2   11 use Carp ( 'carp', 'croak' );
  2         9  
  2         137  
136 2     2   2098 use Encode;
  2         26360  
  2         200  
137 2     2   3666 use Math::BigInt;
  2         64536  
  2         15  
138 2     2   46055 use Math::BigFloat;
  2         48204  
  2         17  
139 2     2   7107 use Unicode::Normalize ( 'NFD', 'NFC' );
  2         5529  
  2         4802  
140              
141             our $VERSION = '0.20';
142             our @ISA = 'Exporter';
143             our @EXPORT_OK = qw( &unpack_alias &pack_alias );
144              
145             sub unpack_alias {
146 2     2 0 2783 my($bytes) = @_;
147 2         4 my(%into, %vol, %dir, %targ, $appinfo, $recsize, $version,
148             $file_length, $file_name, $vol_length, $vol_name, $extra_ptr,
149             @extra);
150              
151 2         16 ($appinfo, $recsize, $version) = unpack('a4 nn', $bytes);
152              
153 2 50 33     19 warn 'Alias record is truncated'
154             if ($recsize > length($bytes) || 150 > length($bytes));
155            
156 2 50       9 warn "Unexpected alias record version (found $version, expected 2)\n"
157             if ($version != 2);
158              
159             # Unpack the fixed-length portion of the alias record.
160             (
161 2         40 $targ{'kind'},
162             $vol_length, $vol_name,
163             @vol{qw( created signature type )},
164             $dir{'inode'},
165             $file_length, $file_name,
166             @targ{qw( inode created type creator )},
167             @into{qw( xfrom xto )},
168             @vol{qw( flags fsid )},
169             $into{'reserved'},
170             ) = unpack('x8 n C a27 N a2 nN C a63 NN a4 a4 nnNa2 a10', $bytes);
171 2         6 $extra_ptr = 150;
172              
173 2         5 $vol{'name'} = substr($vol_name, 0, $vol_length);
174 2         6 $targ{'name'} = substr($file_name, 0, $file_length);
175            
176 2         4 $into{'volume'} = \%vol;
177 2         5 $into{'folder'} = \%dir;
178 2         3 $into{'target'} = \%targ;
179            
180             # Remove fields with known "missing value" values
181 2 50       7 $into{'appinfo'} = $appinfo unless $appinfo eq "\x00\x00\x00\x00";
182 2 50       7 delete $into{'xfrom'} if $into{'xfrom'} == 65535;
183 2 50       6 delete $into{'xto'} if $into{'xto'} == 65535;
184 2 50       9 delete $targ{'creator'} if $targ{'creator'} eq "\x00\x00\x00\x00";
185 2 50       7 delete $targ{'type'} if $targ{'type'} eq "\x00\x00\x00\x00";
186 2 50       6 delete $targ{'inode'} if $targ{'inode'} eq 0xFFFFFFFF;
187 2 50       9 delete $into{'reserved'} if $into{'reserved'} eq ( "\x00" x 10 );
188 2 50       7 delete $vol{'fsid'} if $vol{'fsid'} eq "\x00\x00";
189 2 50       7 delete $dir{'inode'} if $dir{'inode'} eq 0xFFFFFFFF;
190              
191             # If the extra tag-length-value section exists, parse it
192 2 50       5 if (length($bytes) > $extra_ptr) {
193 2         12 my(@extra);
194            
195 2         7 while(length($bytes) >= (4+$extra_ptr)) {
196            
197             # Extract the next record
198 22         5805 my($t, $l) = unpack('nn', substr($bytes, $extra_ptr, 4));
199 22 100       39 last if $t == 65535;
200 20         34 my($f) = substr($bytes, 4+$extra_ptr, $l);
201 20         22 $extra_ptr += 4 + $l;
202 20 100       36 $extra_ptr ++ if ( $l % 2 ) != 0;
203            
204             # Parse a few known record types.
205 20 100       109 if ($t == 0) {
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    50          
206 2         8 $dir{'name'} = $f;
207             } elsif ($t == 1) {
208 2         10 $into{'inode_path'} = [ unpack('N*', $f) ];
209             } elsif ($t == 2) {
210 2         5 $into{'carbon_path'} = $f;
211             } elsif ($t == 9) {
212             # Unknown format, but known to be volume info.
213 0         0 $vol{'9'} = $f;
214             } elsif ($t == 14) {
215 2         7 $targ{'long_name'} = &unpackUC($f);
216             } elsif ($t == 15) {
217 2         7 $vol{'long_name'} = &unpackUC($f);
218             } elsif ($t == 16) {
219 2         14 $vol{'createdUTC'} = &unpackLongTime($f);
220             } elsif ($t == 17) {
221 2         5 $targ{'createdUTC'} = &unpackLongTime($f);
222             } elsif ($t == 18) {
223 2         7 $into{'posix_path'} = $f;
224             } elsif ($t == 19) {
225 2         9 $vol{'posix_path'} = $f;
226             } elsif ($t == 20) {
227 0         0 $vol{'alias'} = &unpack_alias($f);
228             } elsif ($t == 21) {
229 2         9 $into{'posix_homedir_length'} = unpack('n', $f);
230             } else {
231 0         0 push(@extra, $t, $f);
232             }
233             }
234            
235 2 50       11 $into{'extra'} = \@extra if @extra;
236             }
237              
238 2         12 \%into;
239             }
240              
241             sub pack_alias {
242 1     1 0 2017 my(%alis) = @_;
243              
244             # Extract the hashes into local copies so we can
245             # remove entries as we process them.
246 1         3 my(%vol, %dir, %targ);
247 1 50       7 %vol = %{ delete $alis{'volume'} } if exists $alis{'volume'};
  1         13  
248 1 50       7 %dir = %{ delete $alis{'folder'} } if exists $alis{'folder'};
  1         5  
249 1 50       5 %targ = %{ delete $alis{'target'} } if exists $alis{'target'};
  1         7  
250              
251 1         2 my($k, $i, @extra, $extra);
252              
253             # Populate the fixed-length portion of the record.
254 1         14 my(@fixed) = (
255             (delete $alis{'appinfo'}), undef, 2,
256             (delete $targ{'kind'}),
257             undef, (delete($vol{'name'})),
258             (delete @vol{qw( created signature type )}),
259             (delete $dir{'inode'}),
260             undef, (delete($targ{'name'})),
261             (delete @targ{qw( inode created type creator )}),
262             (delete @alis{qw( xfrom xto )}),
263             (delete @vol{qw( flags fsid )}),
264             (delete $alis{'reserved'})
265             );
266              
267             # Fail if any required info is missing.
268 1         11 my(%required) = (
269             3 => 'target->{"kind"}',
270             5 => 'volume->{"name"}',
271             7 => 'volume->{"signature"}',
272             8 => 'volume->{"type"}',
273             11 => 'target->{"name"}'
274             );
275 1         3 foreach $k (keys %required) {
276 5 50       16 croak "Missing value ".$required{$k}
277             unless defined($fixed[$k]);
278             }
279              
280 1         3 $fixed[4] = length($fixed[5]);
281 1         2 $fixed[10] = length($fixed[11]);
282              
283             # Fill in any missing values with their appropriate markers.
284 1         4 my($fc0) = "\x00\x00\x00\x00"; # FourCharCode all zeros
285 1         9 my(@missings) = (
286             $fc0, undef, 2,
287             undef, undef, undef,
288             0, undef, 5,
289             0xFFFFFFFF,
290             undef, undef,
291             0xFFFFFFFF, 0, $fc0, $fc0,
292             0xFFFF, 0xFFFF,
293             0, "\x00\x00",
294             ( "\x00" x 10 )
295             );
296 1 50 33     9 die unless (21 == @fixed) and (@fixed == @missings);
297 1         5 for($i = 0; $i < 21; $i++) {
298 21 100       57 $fixed[$i] = $missings[$i] if !defined $fixed[$i];
299             }
300            
301             # Process any remaining keys into the 'extra' array.
302 1         3 @extra = ();
303 1         4 foreach $k (keys %alis) {
304 4         58 my($v) = $alis{$k};
305 4 100       20 if ($k eq 'inode_path') {
    100          
    100          
    50          
    0          
306 1         9 push(@extra, 1, pack('N*', @$v));
307             } elsif ($k eq 'carbon_path') {
308 1         4 push(@extra, 2, $v);
309             } elsif ($k eq 'posix_path') {
310 1         5 push(@extra, 18, Encode::encode('utf8', $v));
311             } elsif ($k eq 'posix_homedir_length') {
312 1         5 push(@extra, 21, pack('n', $v));
313             } elsif ($k eq 'extra') {
314 0         0 push(@extra, @$v);
315             } else {
316 0         0 carp "Unrecognized alias key \"$k\"";
317             }
318             }
319 1         4 foreach $k (keys %vol) {
320 3         31 my($v) = $vol{$k};
321 3 100       14 if ($k eq 'long_name') {
    100          
    50          
    50          
    0          
322 1         4 push(@extra, 15, &packUC($v));
323             } elsif ($k eq 'posix_path') {
324 1         6 push(@extra, 19, Encode::encode('utf8', $v));
325             } elsif ($k eq 'alias') {
326 0         0 push(@extra, 20, &pack_alias(%$v));
327             } elsif ($k eq 'createdUTC') {
328 1         4 push(@extra, 16, &packLongTime($v));
329             } elsif ($k eq '9') {
330             # Unknown format, but known to be volume info.
331 0         0 push(@extra, 9, $v);
332             } else {
333 0         0 carp "Unrecognized alias key volume->{\"$k\"}";
334             }
335             }
336 1         4 foreach $k (keys %dir) {
337 1         3 my($v) = $dir{$k};
338 1 50       7 if ($k eq 'name') {
339 1         3 push(@extra, 0, $v);
340             } else {
341 0         0 carp "Unrecognized alias key folder->{\"$k\"}";
342             }
343             }
344 1         4 foreach $k (keys %targ) {
345 2         4 my($v) = $targ{$k};
346 2 100       8 if ($k eq 'long_name') {
    50          
347 1         5 push(@extra, 14, &packUC($v));
348             } elsif ($k eq 'createdUTC') {
349 1         3 push(@extra, 17, &packLongTime($v));
350             } else {
351 0         0 carp "Unrecognized alias key target->{\"$k\"}";
352             }
353             }
354            
355 1         3 $extra = '';
356 1 50       5 if (@extra) {
357 1         3 push(@extra, 0xFFFF, '');
358              
359 1         3 while(@extra) {
360 11         14 my($t) = shift @extra;
361 11         13 my($v) = shift @extra;
362 11         22 $extra .= pack('nn', $t, length($v)) . $v;
363 11 100       35 if ((length($v) % 2) == 1) {
364 1         3 $extra .= "\x00";
365             }
366             }
367              
368             }
369              
370 1         4 $fixed[1] = 150 + length($extra);
371            
372 1         17 return pack('a4nnn Ca27 Na2nN Ca63 NNa4a4 nnNa2 a10', @fixed) . $extra;
373             }
374              
375             sub unpackUC {
376 7     7 0 1103 my($buf) = @_;
377              
378 7         23 my($count) = unpack('n', $buf);
379 7         21 my($bufsz) = (length($buf) - 2) / 2;
380 7 50       21 warn "Unicode string has unexpected count (count=$count, expecting $bufsz)\n"
381             if ($count != $bufsz);
382 7         38 return Encode::decode('utf-16be', substr($buf, 2));
383             }
384              
385             sub packUC {
386 5     5 0 8537 my($str) = @_;
387 5         52 my($bytes) = Encode::encode('utf-16be', NFD($str));
388 5         261 return pack('n', length($bytes)/2) . $bytes;
389             }
390              
391             sub unpackLongTime {
392             # Precise times are stored in 48.16-fixed-point time format
393             # This corresponds to the UTCDateTime format.
394             # It represents the number of seconds (and fractional seconds)
395             # since the Mac epoch of Jan 1, 1904.
396             # The offset from the common POSIX epoch is 2082844800 seconds.
397 9     9 0 31 my($h, $m, $l) = unpack('nNn', $_[0]);
398 9         32 my($t);
399            
400 9 100       24 if ($h == 0) {
401 7         10 $t = $m;
402             } else {
403 2         18 $t = from_hex Math::BigInt '0x'.unpack('H*', substr($_[0], 0, 6));
404             }
405              
406 9 100       341 return $t if ($l == 0);
407            
408 2         19 $l = new Math::BigFloat $l;
409 2         309 $l->precision(-5);
410 2         147 $l->bdiv(65536);
411 2         1227 $l->badd($t);
412 2         872 return $l;
413             }
414              
415             sub packLongTime {
416 7     7 0 546 my($str) = @_;
417 7         12 my(@x);
418              
419 7 50       128 if (@x = ($str =~ /^(\d+):(\d+):(\d+)$/)) {
    100          
    50          
420 0         0 return pack('nNn', @x);
421             } elsif ($str =~ /^(\d+)(\.\d+)$/) {
422 2         32 return pack('nNn', 0, int($1), 65536 * ('0' . $2));
423             } elsif ($str =~ /^\d+$/) {
424 5         39 return pack('nNn', 0, $str, 0);
425             } else {
426 0           croak "Cannot pack \"$str\" into 48.16-bit time";
427             }
428             }
429              
430             =head1 CREDITS
431              
432             The initial information about the structure of alias records was derived
433             from an unattributed text file found in various places on the internet.
434              
435             Perl implementation and additional format investigation by Wim Lewis.
436              
437             =head1 COPYRIGHT
438              
439             Copyright 2011-2013, Wim Lewis Ewiml@hhhh.orgE
440              
441             This software is available under the same terms as perl.
442              
443             =cut
444              
445             1;