File Coverage

blib/lib/Image/MetaData/JPEG/parsers/app13.pl
Criterion Covered Total %
statement 55 55 100.0
branch 20 26 76.9
condition n/a
subroutine 7 7 100.0
pod 0 3 0.0
total 82 91 90.1


line stmt bran cond sub pod time code
1             ###########################################################
2             # A Perl package for showing/modifying JPEG (meta)data. #
3             # Copyright (C) 2004,2005,2006 Stefano Bettelli #
4             # See the COPYING and LICENSE files for license terms. #
5             ###########################################################
6 15     15   69 use Image::MetaData::JPEG::data::Tables qw(:TagsAPP13);
  15         23  
  15         1895  
7 15     15   85 no integer;
  15         22  
  15         71  
8 15     15   267 use strict;
  15         23  
  15         353  
9 15     15   55 use warnings;
  15         21  
  15         7723  
10              
11             ###########################################################
12             # This method parses an APP13 segment, often used by pho- #
13             # to-manipulation programs to store IPTC (International #
14             # Press Telecommunications Council) tags, although this #
15             # isn't a formally defined standard (first adopted by #
16             # Adobe). The structure of an APP13 segment is as follows #
17             #---------------------------------------------------------#
18             # 14 bytes identifier, e.g. "Photoshop 3.0\000" #
19             # 8 bytes resolution (?, Photoshop 2.5 only) #
20             # ..... sequence of Photoshop Image Resource blocks #
21             #=========================================================#
22             # The sequence of resource blocks may require additional #
23             # APP13 markers, whose order is always to be preserved. #
24             # TODO: implement parsing of multiple blocks!!!! #
25             #=========================================================#
26             # Ref: "Adobe Photoshop 6.0: File Formats Specifications",#
27             # Adobe System Inc., ver.6.0, rel.2, November 2000. #
28             # and "\"Solo\" Image File Format. RichTIFF and its #
29             # replacement by \"Solo\" JFIF", version 2.0a, #
30             # Coatsworth Comm. Inc., Brampton, Ontario, Canada #
31             ###########################################################
32             sub parse_app13 {
33 30     30 0 52 my ($this) = @_;
34 30         42 my $offset = 0;
35             # they say that this segment always starts with a specific
36             # string from Adobe, namely "Photoshop 3.0\000". But some
37             # old pics, with only non-IPTC data, use other strings ...
38             # try all known possibilities and die if no match is found
39 30         72 for my $good_id (@$APP13_PHOTOSHOP_IDS) {
40 60 100       144 next if $this->size() < length $good_id;
41 56         150 my $id = $this->read_record($UNDEF, 0, length $good_id);
42 56 100       275 next unless $good_id eq $id;
43             # store the identifier (and some additional bytes for ver.2.5 only)
44 29         111 $this->store_record('Identifier', $ASCII, $offset, length $id);
45 29 100       138 $this->store_record('Resolution', $SHORT, $offset, 4) if $id =~ /2\.5/;
46             }
47             # Die if no identifier was found (show first ten characters)
48 30 100       106 $this->die('Wrong identifier ('.$this->read_record($UNDEF, 0, 10).')')
49             unless $this->search_record('Identifier');
50             # not much to do now, except calling repeatedly a method for
51             # parsing resource data blocks. The argument is the current
52             # offset, and the output is the new offset after the block
53 29         100 $offset = $this->parse_resource_data_block($offset)
54             while ($offset < $this->size());
55             # complain if we read a bit too much ...
56 29         128 $this->test_size($offset, "parsed after segment end");
57             }
58              
59             ###########################################################
60             # This method parses an APP13 resource data block (TODO: #
61             # blocks spanning multiple APP13s). Currently, it treates #
62             # in details IPTC (International Press Telecommunications #
63             # Council) blocks, and just saves the other tags (which #
64             # are, however, in general, much simpler). The only argu- #
65             # ment is the current offset in the data area of this #
66             # object. The output is the new offset after this block. #
67             # The structure of a resource data block is: #
68             #---------------------------------------------------------#
69             # 4 bytes type (Photoshop uses "8BIM" from v.6.0 on) #
70             # 2 bytes unique identifier (e.g. "\004\004" for IPTC) #
71             # 1 byte length of resource data block name #
72             # .... name (padded to make size even incl. length) #
73             # 4 bytes size of resource data (following data only) #
74             # .... data (padded to make size even) #
75             #---------------------------------------------------------#
76             # The content of each Photoshop non-IPTC data block is #
77             # transformed into a record and stored in a first-level #
78             # subdirectory, depending on its type. The block type is, #
79             # in fact, no more supposed to be '8BIM'; however, only #
80             # some known values are accepted. The IPTC data block is #
81             # instead analysed in detail, and all findings are stored #
82             # in another (sub)directory tree. Empty subdirectories #
83             # are not created. #
84             #=========================================================#
85             # Ref: "Adobe Photoshop 6.0: File Formats Specifications",#
86             # Adobe System Inc., ver.6.0, rel.2, November 2000. #
87             # and "\"Solo\" Image File Format. RichTIFF and its #
88             # replacement by \"Solo\" JFIF", version 2.0a, #
89             # Coatsworth Comm. Inc., Brampton, Ontario, Canada #
90             ###########################################################
91             sub parse_resource_data_block {
92 407     407 0 471 my ($this, $offset) = @_;
93             # An "Adobe Phostoshop" block usually starts with "8BIM".
94             # Accepted values are listed in @$APP13_PHOTOSHOP_TYPE.
95 407         780 my $type = $this->read_record($ASCII, $offset, 4);
96 1221         1777 $this->die("Wrong resource data block type ($type)")
97 407 50       989 unless grep { $_ eq $type } @$APP13_PHOTOSHOP_TYPE;
98             # then there is the block identifier
99 407         799 my $identifier = $this->read_record($SHORT, $offset);
100             # get the name length and the name. The length is the first byte.
101             # The name can be padded so that length+name span an even number
102             # of bytes. Usually the name is "" (the empty string, with length
103             # 0, not "\000", which has length 1) so we get "\000\000" here.
104 407         1242 my $name_length = $this->read_record($BYTE, $offset);
105 407         1258 my $name = $this->read_record($ASCII, $offset, $name_length);
106             # read the padding byte if length was even
107 407 50       1560 $this->read_record($UNDEF, $offset, 1) if ($name_length % 2) == 0;
108             # the next four bytes encode the resource data size. Also in this
109             # case the total size must be padded to an even number of bytes
110 407         1046 my $data_length = $this->read_record($LONG, $offset);
111 407 100       1110 my $need_padding = ($data_length % 2) ? 1 : 0;
112             # check that there is enough data for this block; obviously, this
113             # break the case of a resource data block spanning multiple segments!
114 407         908 $this->test_size($offset + $data_length + $need_padding,
115             "in IPTC resource data block");
116             # calculate the absolute end of the resource data block
117 407         423 my $boundary = $offset + $data_length;
118             # Currently, the IPTC block deserves a special treatment: repeatedly
119             # read data from the data block, up to an amount equal to $data_length.
120             # The IPTC-parsing routine, as usual, returns the new working offset at
121             # the end. The IPTC records are written in separate subdirectories. There
122             # should be no resource block description for IPTC, make it an error.
123 407 100       629 if ($identifier eq $APP13_PHOTOSHOP_IPTC) {
124 24 50       62 $this->die("Non-empty IPTC resource block descriptor") if $name ne '';
125 24         127 $offset=$this->parse_IPTC_dataset($offset) while ($offset<$boundary); }
126             # Less interesting tags are mistreated. However, they should not pollute
127             # the root dir, so a subdirectory is used, which depends on $type. $name
128             # is stored in the "extra" field for use at dump time.
129 383         604 else { my $dirname = $APP13_PHOTOSHOP_DIRNAME . '_' . $type;
130 383         716 my $dir = $this->provide_subdirectory($dirname);
131 383         794 $this->store_record($dir,$identifier,$UNDEF,$offset,$data_length);
132 383 50       655 $this->search_record('LAST_RECORD',$dir)->{extra} = $name if $name;}
133             # pad, if you need padding ...
134 407 100       619 ++$offset if $need_padding;
135             # that's it, return the working offset
136 407         1111 return $offset;
137             }
138              
139             ###########################################################
140             # This method parses one dataset from an APP13 IPTC block #
141             # and creates a corresponding record in the appropriate #
142             # subdirectory (which depends on the IPTC record number). #
143             # The $offset argument is a pointer in the segment data #
144             # area, which must be returned updated at the end of the #
145             # routine. An IPTC record is a sequence of datasets, #
146             # which need not be in numerical order, unless otherwise #
147             # specified. Each dataset consists of a unique tag and a #
148             # data field. A standard tag is used when the data field #
149             # size is less than 32768 bytes; otherwise, an extended #
150             # tag is used. The structure of a dataset is: #
151             #---------------------------------------------------------#
152             # 1 byte tag marker (must be 0x1c) #
153             # 1 byte record number (e.g., 2 for 2:xx datasets) #
154             # 1 byte dataset number #
155             # 2 bytes data length (< 32768 octets) or length of ... #
156             # <....> data length (> 32767 bytes only) #
157             # .... data (its length is specified before) #
158             #=========================================================#
159             # So, standard datasets have a 5 bytes tag; the last two #
160             # bytes in the tag contain the data field length, the msb #
161             # being always 0. For extended datasets instead, these #
162             # two bytes contain the length of the (following) data #
163             # field length, the msb being always 1. The value of the #
164             # msb thus distinguishes "standard" from "extended"; in #
165             # digital photographies, I assume that the datasets which #
166             # are actually used (a subset of the standard) are always #
167             # standard; therefore, we are likely not to have the IPTC #
168             # record not spanning more than one APP13 segment. #
169             #=========================================================#
170             # The record types defined by the IPTC-NAA standard and #
171             # the corresponding dataset ranges are: #
172             # #
173             # Object Envelop Record: 1:xx #
174             # Application Records: 2:xx through 6:xx #
175             # Pre-ObjectData Descriptor Record: 7:xx #
176             # ObjectData Record: 8:xx #
177             # Post-ObjectData Descriptor Record: 9:xx #
178             # #
179             # The Adobe "pseudo"-standard is usually restricted to #
180             # the first application record, so it is unlikely, but #
181             # not impossible, to find datasets outside of 2:xx. #
182             # Record numbers should only be found in increasing #
183             # order, but this rule is currently not enforced here. #
184             #=========================================================#
185             # Ref: "IPTC-NAA: Information Interchange Model Version 4"#
186             # Comité Internat. des Télécommunications de Presse. #
187             ###########################################################
188             sub parse_IPTC_dataset {
189 370     370 0 378 my ($this, $offset) = @_;
190             # check that there is enough data for the dataset header
191 370         780 $this->test_size($offset + 5, "in IPTC dataset");
192             # each record is a sequence of variable length data sets read the
193             # first four fields (five bytes), and store them in local variables.
194 370         720 my $marker = $this->read_record($BYTE , $offset);
195 370         1137 my $rnumber = $this->read_record($BYTE , $offset);
196 370         1086 my $dataset = $this->read_record($BYTE , $offset);
197 370         1099 my $length = $this->read_record($SHORT, $offset);
198             # check that the tag marker is 0x1c as specified by the IPTC standard
199 370 50       929 $this->die("Invalid IPTC tag marker ($marker)")
200             if $marker ne $APP13_IPTC_TAGMARKER;
201             # retrieve or create the correct subdirectory; this depends on
202             # the record number (most often, it is 2, for 2:xx datasets)
203 370         1089 my $dir = $this->provide_subdirectory("${APP13_IPTC_DIRNAME}_$rnumber");
204             # if $length has the msb set, then we are dealing with an
205             # extended dataset. In this case, abort and write more code
206 370 50       703 $this->die("IPTC extended datasets not yet supported")
207             if $length & (0x01 << 15);
208             # push a new record reference in the correct subdir. Use the
209             # dataset number as identifier, the rest is strightforward
210             # (assume that the data type is always ASCII).
211 370         762 $this->store_record($dir, $dataset, $ASCII, $offset, $length);
212             # return the update offset
213 370         1078 return $offset;
214             }
215              
216             # successful load
217             1;