File Coverage

blib/lib/Mac/PropertyList/WriteBinary.pm
Criterion Covered Total %
statement 147 156 94.2
branch 42 50 84.0
condition n/a
subroutine 27 28 96.4
pod 0 1 0.0
total 216 235 91.9


line stmt bran cond sub pod time code
1 2     2   110310 use v5.10;
  2         12  
2              
3             package Mac::PropertyList::WriteBinary;
4 2     2   11 use strict;
  2         4  
  2         40  
5 2     2   8 use warnings;
  2         4  
  2         48  
6              
7 2     2   488 use Encode ();
  2         8241  
  2         34  
8 2     2   395 use Mac::PropertyList ();
  2         5  
  2         47  
9 2     2   10 use Math::BigInt;
  2         4  
  2         20  
10 2     2   602 use Exporter qw(import);
  2         4  
  2         179  
11              
12             =encoding utf8
13              
14             =head1 NAME
15              
16             Mac::PropertyList::WriteBinary - pack data into a Mac "binary property list"
17              
18             =head1 SYNOPSIS
19              
20             use Mac::PropertyList::WriteBinary;
21              
22             my $data = Mac::PropertyList::dict->new( { ... => ... } );
23             my $buf = Mac::PropertyList::WriteBinary::as_string($data);
24              
25             =head1 DESCRIPTION
26              
27             The C function converts a property list structure
28             (composed of instances of C,
29             C, etc.) into a binary format compatible
30             with the Apple CoreFoundation binary property list functions.
31              
32             It takes a single argument, the top-level object to write, and returns
33             a byte string.
34              
35             The property list can contain the following perl objects:
36              
37             =over 4
38              
39             =item C value objects
40              
41             These are written according to their class.
42              
43             =item Unblessed references to Perl lists and hashes
44              
45             These are written as arrays and dictionaries, respectively.
46              
47             =item Perl scalars
48              
49             All Perl scalars are written as strings; this is similar to the behavior
50             of writing an oldstyle OpenStep property list, which does not
51             distinguish between numbers and strings, and then reading it using
52             CoreFoundation functions.
53              
54             =item C
55              
56             This is written as the null object. CoreFoundation will read this as
57             C, but appears to be unable to write it.
58              
59             =back
60              
61             Strings are uniqued (two equal strings will be written as references
62             to the same object). If the same reference appears more than once in
63             the structure, it will likewise only be represented once in the
64             output. Although the bplist format can represent circular data
65             structures, they cannot be written by this module (they will be
66             detected and result in an error — they wouldn't be read correctly by
67             CoreFoundation anyway, so you aren't missing much).
68              
69             =head1 BUGS
70              
71             C objects are not handled yet.
72              
73             Objects other than strings (and null) are not uniqued by value,
74             only by reference equality. This may change in a future version.
75              
76             Perl's dictionary keys can only be strings, but a bplist's can be
77             any scalar object.
78              
79             There is no way to write the C objects used by the keyed archiver.
80              
81             Perls that do not use IEEE-754 format internally for floating point
82             numbers will produce incorrect output.
83              
84             =cut
85              
86             use constant {
87             header => 'bplist00',
88              
89             tagInteger => 0x10,
90             tagFloat => 0x20,
91             tagDate => 0x30,
92             tagData => 0x40,
93             tagASCII => 0x50,
94             tagUTF16 => 0x60,
95             tagUID => 0x80,
96             tagArray => 0xA0,
97             tagSet => 0xC0,
98             tagDict => 0xD0,
99              
100             # If we can actually represent an integer close to 2^64 with full
101             # precision and pack it with 'Q', then we can use that
102 2 50       3 havePack64 => ( eval { pack('Q>', 1153202979583557643) eq "\x10\x01\0\0\0\0\0\x0B" } ? 1 : 0 ),
  2         2566  
103 2     2   12 };
  2         4  
104              
105             our $VERSION = '1.504';
106             our @EXPORT_OK = qw( as_string );
107              
108             sub as_string {
109 40     40 0 1754 my($value) = @_;
110 40         76 my($ctxt) = _create_fragments($value);
111 38         47 my(@offsets, $xref_offset, $offset_size);
112              
113             # The header (magic number and version, which is 00)
114 38         54 my($buf) = header;
115              
116             # Write each fragment, making note of its offset in the file
117 38         88 foreach my $objid (0 .. $ctxt->{nextid}-1) {
118 58         82 $offsets[$objid] = length $buf;
119 58         129 $buf .= $ctxt->{fragments}->{$objid};
120             }
121              
122             # ... and the offset of the beginning of the offsets table
123 38         48 $xref_offset = length $buf;
124              
125             # Figure out how many bytes to use to represent file offsets,
126             # and append the offset table
127 38 100       65 if ($xref_offset < 256) {
    50          
128 37         76 $buf .= pack('C*', @offsets);
129 37         45 $offset_size = 1;
130             } elsif ($xref_offset < 65536) {
131 1         3 $buf .= pack('n*', @offsets);
132 1         2 $offset_size = 2;
133             } else {
134 0         0 $buf .= pack('N*', @offsets);
135 0         0 $offset_size = 4;
136             }
137              
138             # Write the file trailer
139             $buf .= pack('x5 CCC ' . ( havePack64? 'Q>' : 'x4N' ) x 3,
140             0, $offset_size, $ctxt->{objref_size},
141 38         147 $ctxt->{nextid}, $ctxt->{rootid}, $xref_offset);
142              
143 38         171 $buf;
144             }
145              
146             # sub to_file {
147             # To consider:
148             # It might be useful to have a version of &as_string which writes
149             # the fragments directly to a file handle without having to build a
150             # single large buffer in RAM. This would be more efficient for
151             # larger structures. On the other hand, if you're writing large
152             # structures with this module, you're already suffering needlessly,
153             # so perhaps it's not worth optimizing overmuch for that case.
154             # }
155              
156              
157             # _assign_id is the workhorse function which recursively
158             # descends the data structure and assigns object ids to each node,
159             # as well as creating fragments of the final file.
160             sub _assign_id {
161 82     82   126 my($context, $value) = @_;
162              
163             # The type of this value
164 82         105 my($tp) = ref $value;
165              
166             # Unblessed scalars are either strings or undef.
167 82 100       156 if ($tp eq '') {
168 24 100       42 if (!defined $value) {
169             $context->{nullid} = $context->{nextid} ++
170 2 50       8 unless defined $context->{nullid};
171 2         6 return $context->{nullid};
172             } else {
173             $context->{strings}->{$value} = $context->{nextid} ++
174 22 100       57 unless exists $context->{strings}->{$value};
175 22         54 return $context->{strings}->{$value};
176             }
177             }
178              
179             # If we reach here we know that $value is a ref. Keep a table of
180             # stringified refs, so that we can re-use the id of an object
181             # we've seen before.
182 58 100       145 if(exists $context->{refs}{$value}) {
183 4         8 my($thisid) = $context->{refs}->{$value};
184 4 100       15 die "Recursive data structure\n" unless defined $thisid;
185 3         8 return $thisid;
186             }
187 54         121 $context->{refs}->{$value} = undef;
188              
189             # Serialize the object into $fragment if possible. Since we
190             # don't yet know how many bytes we will use to represent object
191             # ids in the final file, don't serialize those yet–keep them
192             # as a list of integers for now.
193 54         71 my($fragment, @objrefs);
194              
195 54 100       228 if($tp eq 'ARRAY') {
    100          
    100          
196 4         9 $fragment = _counted_header(tagArray, scalar @$value);
197 4         8 @objrefs = map { $context->_assign_id($_) } @$value;
  9         17  
198             } elsif($tp eq 'HASH') {
199 3         13 my(@ks) = sort (CORE::keys %$value);
200 3         8 $fragment = _counted_header(tagDict, scalar @ks);
201 6         10 @objrefs = ( ( map { $context->_assign_id($_) } @ks ),
202 3         6 ( map { $context->_assign_id($value->{$_}) } @ks ) );
  6         12  
203             } elsif(UNIVERSAL::can($tp, '_as_bplist_fragment')) {
204 46         93 ($fragment, @objrefs) = $value->_as_bplist_fragment($context);
205             } else {
206 1         11 die "Cannot serialize type '$tp'\n";
207             }
208              
209             # As a special case, a fragment of 'undef' indicates that
210             # the object ID was already assigned.
211 50 100       115 return $objrefs[0] if !defined $fragment;
212              
213             # Assign the next object ID to this object.
214 44         68 my($thisid) = $context->{nextid} ++;
215 44         80 $context->{refs}->{$value} = $thisid;
216              
217             # Store the fragment and unpacked object references (if any).
218 44         79 $context->{fragments}->{$thisid} = $fragment;
219 44 100       94 $context->{objrefs}->{$thisid} = \@objrefs if @objrefs;
220              
221 44         107 return $thisid;
222             }
223              
224             sub _create_fragments {
225 40     40   55 my ($value) = @_;
226              
227             # Set up the state needed by _assign_id
228              
229 40         141 my ($ctxt) = bless({
230             nextid => 0, # The next unallocated object ID
231             nullid => undef, # The object id of 'null'
232             strings => { }, # Maps string values to object IDs
233             refs => { }, # Maps stringified refs to object IDs
234             fragments => { }, # Maps object IDs to bplist fragments, except object lists
235             objrefs => { }, # Maps object IDs to objref lists
236             });
237              
238             # Traverse the data structure, and remember the id of the root object
239 40         80 $ctxt->{rootid} = $ctxt->_assign_id($value);
240              
241             # Figure out how many bytes to use to represent an object id.
242 38         49 my ($objref_pack);
243 38 50       62 if ($ctxt->{nextid} < 256) {
    0          
244 38         48 $objref_pack = 'C*';
245 38         86 $ctxt->{objref_size} = 1;
246             } elsif ($ctxt->{nextid} < 65536) {
247 0         0 $objref_pack = 'n*';
248 0         0 $ctxt->{objref_size} = 2;
249             } else {
250 0         0 $objref_pack = 'N*';
251 0         0 $ctxt->{objref_size} = 4;
252             }
253              
254 38         82 my($objid, $reflist, $stringval);
255              
256             # Append the unformatted object ids to their corresponding fragments,
257             # now that we know how to pack them.
258 38         44 while (($objid, $reflist) = each %{$ctxt->{objrefs}}) {
  46         135  
259 8         19 $ctxt->{fragments}->{$objid} .= pack($objref_pack, @$reflist);
260             }
261 38         81 delete $ctxt->{objrefs};
262              
263             # Create fragments for all the strings.
264             # TODO: If &to_file is written, it would be worth
265             # breaking this out so that the conversion can be done on the
266             # fly without keeping all of the converted strings in memory.
267             {
268 38         53 my($ascii) = Encode::find_encoding('ascii');
  38         92  
269 38         440 my($utf16be) = Encode::find_encoding('UTF-16BE');
270              
271 38         2836 while (($stringval, $objid) = each %{$ctxt->{strings}}) {
  50         134  
272 12         15 my($fragment);
273              
274             # Strings may be stored as ASCII (7 bits) or UTF-16-bigendian.
275 12 100       42 if ($stringval =~ /\A[\x01-\x7E]*\z/s) {
276             # The string is representable in ASCII.
277 9         36 $fragment = $ascii->encode($stringval);
278 9         18 $fragment = _counted_header(tagASCII, length $fragment) . $fragment;
279             } else {
280 3         28 $fragment = $utf16be->encode($stringval);
281 3         9 $fragment = _counted_header(tagUTF16, (length $fragment)/2) . $fragment;
282             }
283              
284 12         34 $ctxt->{fragments}->{$objid} = $fragment;
285             }
286             }
287              
288             # If there's a in the file, create its fragment.
289             $ctxt->{fragments}->{$ctxt->{nullid}} = "\x00"
290 38 100       92 if defined $ctxt->{nullid};
291              
292 38         59 $ctxt;
293             }
294              
295             sub _counted_header {
296 28     28   48 my ($typebyte, $count) = @_;
297              
298             # Datas, strings, and container objects have a count/size encoded
299             # in the lower 4 bits of their type byte. If the count doesn't fit
300             # in 4 bits, the bits are set to all-1s and the actual value
301             # follows, encoded as an integer (including the integer's
302             # own type byte).
303              
304 28 100       46 if ($count < 15) {
305 25         76 return pack('C', $typebyte + $count);
306             } else {
307 3         10 return pack('C', $typebyte + 15) . &_pos_integer($count);
308             }
309             }
310              
311             sub _pos_integer {
312 16     16   26 my($count) = @_;
313              
314 16 100       30 if ($count < 256) {
    100          
    50          
315 12         39 return pack('CC', tagInteger + 0, $count);
316             } elsif ($count < 65536) {
317 3         12 return pack('CS>', tagInteger + 1, $count);
318             } elsif (havePack64 && ($count > 4294967295)) {
319 0         0 return pack('Cq>', tagInteger + 3, $count);
320             } else {
321 1         5 return pack('CN', tagInteger + 2, $count);
322             }
323             }
324              
325             package Mac::PropertyList::array;
326              
327             sub _as_bplist_fragment {
328 2     2   10 my($context, @items) = ( $_[1], $_[0]->value );
329 2         6 @items = map { $context->_assign_id($_) } @items;
  3         5  
330              
331 2         5 return ( Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagArray, scalar @items),
332             @items );
333             }
334              
335             package Mac::PropertyList::dict;
336              
337             sub _as_bplist_fragment {
338 4     4   8 my($self, $context) = @_;
339 4         12 my($value) = scalar $self->value; # Returns a ref in scalar context
340 4         17 my(@keys) = sort (CORE::keys %$value);
341              
342             return ( Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagDict, scalar @keys),
343 6         13 ( map { $context->_assign_id($_) } @keys ),
344 4         9 ( map { $context->_assign_id($value->{$_}) } @keys ));
  6         12  
345              
346             }
347              
348             package Mac::PropertyList::date;
349              
350 2     2   18 use Scalar::Util ( 'looks_like_number' );
  2         4  
  2         131  
351 2     2   967 use Time::Local ( 'timegm' );
  2         3647  
  2         464  
352              
353             sub _as_bplist_fragment {
354 4     4   15 my($value) = scalar $_[0]->value;
355 4         8 my($posixval);
356              
357 4 100       23 if (looks_like_number($value)) {
    50          
358 1         2 $posixval = $value;
359             } elsif ($value =~ /\A(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d)\:(\d\d)\:(\d\d(?:\.\d+)?)Z\z/) {
360 3         16 $posixval = timegm($6, $5, $4, $3, $2 - 1, $1);
361             } else {
362 0         0 die "Invalid plist date '$value'\n";
363             }
364              
365             # Dates are simply stored as floating-point numbers (seconds since the
366             # start of the CoreFoundation epoch) with a different tag value.
367             # See the notes in Mac::PropertyList::real on float format.
368 4         116 return pack('Cd>', Mac::PropertyList::WriteBinary::tagDate + 3,
369             $posixval - 978307200);
370             }
371              
372             package Mac::PropertyList::real;
373              
374             # Here we're assuming that the 'd' format for pack produces
375             # an IEEE-754 double-precision (64-bit) floating point
376             # representation, because ... it does on practically every
377             # system. However, this will not be portable to systems which
378             # don't natively use IEEE-754 format!
379              
380             sub _as_bplist_fragment {
381 5     5   10 my($self) = shift;
382              
383 5         16 return pack('Cd>', Mac::PropertyList::WriteBinary::tagFloat + 3, $self->value);
384             }
385              
386             package Mac::PropertyList::integer;
387              
388 2     2   15 use constant tagInteger => 0x10;
  2         24  
  2         269  
389              
390             sub _as_bplist_fragment {
391 15     15   36 my($value) = $_[0]->value;
392              
393             # Per comments in CFBinaryPList.c, only 8-byte integers (and
394             # 16-byte integers, if they're supported, which they're not) are
395             # interpreted as signed. Shorter integers are always unsigned.
396             # Therefore all negative numbers must be written as 8-byte
397             # integers.
398              
399 15 100       29 if ($value < 0) {
400 2         3 if (Mac::PropertyList::WriteBinary::havePack64) {
401 2         9 return pack('Cq>', tagInteger + 3, $value);
402             } else {
403             return pack('CSSl>', tagInteger + 3, 65535, 65535, $value);
404             }
405             } else {
406 13         24 return Mac::PropertyList::WriteBinary::_pos_integer($value);
407             }
408             }
409              
410             package Mac::PropertyList::uid;
411              
412 2     2   21 use constant tagUID => Mac::PropertyList::WriteBinary->tagUID;
  2         4  
  2         613  
413              
414             sub _as_bplist_fragment {
415 4     4   12 my( $value ) = $_[0]->value;
416              
417             # TODO what about UIDs longer than 16 bytes? Or are there none?
418 4         21 return pack 'CH*', tagUID + length( $value ) / 2 - 1, $value;
419             }
420              
421             package Mac::PropertyList::string;
422              
423             sub _as_bplist_fragment {
424             # Returning a fragment of 'undef' indicates we've already assigned
425             # an object ID.
426 6     6   17 return ( undef, $_[1]->_assign_id($_[0]->value) );
427             }
428              
429             package Mac::PropertyList::ustring;
430              
431             sub _as_bplist_fragment {
432             # Returning a fragment of 'undef' indicates we've already assigned
433             # an object ID.
434 0     0   0 return ( undef, $_[1]->_assign_id($_[0]->value) );
435             }
436              
437             package Mac::PropertyList::data;
438              
439             sub _as_bplist_fragment {
440 3     3   9 my($value) = $_[0]->value;
441 3         8 return (&Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagData, length $value) .
442             $value);
443             }
444              
445             package Mac::PropertyList::true;
446              
447 2     2   5 sub _as_bplist_fragment { return "\x09"; }
448              
449             package Mac::PropertyList::false;
450              
451 1     1   2 sub _as_bplist_fragment { return "\x08"; }
452              
453             =head1 AUTHOR
454              
455             Wim Lewis, C<< >>
456              
457             Copyright © 2012-2021 Wim Lewis. All rights reserved.
458              
459             Tom Wyant added support for UID types.
460              
461             This program is free software; you can redistribute it and/or modify
462             it under the same terms as Perl itself.
463              
464             =head1 SEE ALSO
465              
466             L for the inverse operation.
467              
468             Apple's partial published CoreFoundation source code:
469             L
470              
471             =cut
472              
473             "One more thing";