File Coverage

blib/lib/Image/ExifTool/DjVu.pm
Criterion Covered Total %
statement 68 76 89.4
branch 31 50 62.0
condition 7 18 38.8
subroutine 7 7 100.0
pod 0 4 0.0
total 113 155 72.9


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: DjVu.pm
3             #
4             # Description: Read DjVu archive meta information
5             #
6             # Revisions: 09/25/2008 - P. Harvey Created
7             #
8             # References: 1) http://djvu.sourceforge.net/ (DjVu v3 specification, Nov 2005)
9             # 2) http://www.djvu.org/
10             #
11             # Notes: DjVu files are recognized and the IFF structure is processed
12             # by Image::ExifTool::AIFF
13             #------------------------------------------------------------------------------
14              
15             package Image::ExifTool::DjVu;
16              
17 1     1   4511 use strict;
  1         2  
  1         38  
18 1     1   5 use vars qw($VERSION);
  1         2  
  1         42  
19 1     1   6 use Image::ExifTool qw(:DataAccess :Utils);
  1         2  
  1         1607  
20              
21             $VERSION = '1.07';
22              
23             sub ParseAnt($);
24             sub ProcessAnt($$$);
25             sub ProcessMeta($$$);
26             sub ProcessBZZ($$$);
27              
28             # DjVu chunks that we parse (ref 4)
29             %Image::ExifTool::DjVu::Main = (
30             GROUPS => { 2 => 'Image' },
31             NOTES => q{
32             Information is extracted from the following chunks in DjVu images. See
33             L for the DjVu specification.
34             },
35             INFO => {
36             SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Info' },
37             },
38             FORM => {
39             TypeOnly => 1, # extract chunk type only, then descend into chunk
40             SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Form' },
41             },
42             ANTa => {
43             SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Ant' },
44             },
45             ANTz => {
46             Name => 'CompressedAnnotation',
47             SubDirectory => {
48             TagTable => 'Image::ExifTool::DjVu::Ant',
49             ProcessProc => \&ProcessBZZ,
50             }
51             },
52             INCL => 'IncludedFileID',
53             );
54              
55             # information in the DjVu INFO chunk
56             %Image::ExifTool::DjVu::Info = (
57             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
58             GROUPS => { 2 => 'Image' },
59             FORMAT => 'int8u',
60             PRIORITY => 0, # first INFO block takes priority
61             0 => {
62             Name => 'ImageWidth',
63             Format => 'int16u',
64             },
65             2 => {
66             Name => 'ImageHeight',
67             Format => 'int16u',
68             },
69             4 => {
70             Name => 'DjVuVersion',
71             Description => 'DjVu Version',
72             Format => 'int8u[2]',
73             # (this may be just one byte as with version 0.16)
74             ValueConv => '$val=~/(\d+) (\d+)/ ? "$2.$1" : "0.$val"',
75             },
76             6 => {
77             Name => 'SpatialResolution',
78             Format => 'int16u',
79             ValueConv => '(($val & 0xff)<<8) + ($val>>8)', # (little-endian!)
80             },
81             8 => {
82             Name => 'Gamma',
83             ValueConv => '$val / 10',
84             },
85             9 => {
86             Name => 'Orientation',
87             Mask => 0x07, # (upper 5 bits reserved)
88             PrintConv => {
89             1 => 'Horizontal (normal)',
90             2 => 'Rotate 180',
91             5 => 'Rotate 90 CW',
92             6 => 'Rotate 270 CW',
93             },
94             },
95             );
96              
97             # information in the DjVu FORM chunk
98             %Image::ExifTool::DjVu::Form = (
99             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
100             GROUPS => { 2 => 'Image' },
101             0 => {
102             Name => 'SubfileType',
103             Format => 'undef[4]',
104             Priority => 0,
105             PrintConv => {
106             DJVU => 'Single-page image',
107             DJVM => 'Multi-page document',
108             PM44 => 'Color IW44',
109             BM44 => 'Grayscale IW44',
110             DJVI => 'Shared component',
111             THUM => 'Thumbnail image',
112             },
113             },
114             );
115              
116             # tags found in the DjVu annotation chunk (ANTz or ANTa)
117             %Image::ExifTool::DjVu::Ant = (
118             PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessAnt,
119             GROUPS => { 2 => 'Image' },
120             NOTES => 'Information extracted from annotation chunks.',
121             # Note: For speed, ProcessAnt() pre-scans for known tag ID's, so if any
122             # new tags are added here they must also be added to the pre-scan check
123             metadata => {
124             SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Meta' }
125             },
126             xmp => {
127             Name => 'XMP',
128             SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }
129             },
130             );
131              
132             # tags found in the DjVu annotation metadata
133             %Image::ExifTool::DjVu::Meta = (
134             PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessMeta,
135             GROUPS => { 1 => 'DjVu-Meta', 2 => 'Image' },
136             NOTES => q{
137             This table lists the standard DjVu metadata tags, but ExifTool will extract
138             any tags that exist even if they don't appear here. The DjVu v3
139             documentation endorses tags borrowed from two standards: 1) BibTeX
140             bibliography system tags (all lowercase Tag ID's in the table below), and 2)
141             PDF DocInfo tags (capitalized Tag ID's).
142             },
143             # BibTeX tags (ref http://en.wikipedia.org/wiki/BibTeX)
144             address => { Groups => { 2 => 'Location' } },
145             annote => { Name => 'Annotation' },
146             author => { Groups => { 2 => 'Author' } },
147             booktitle => { Name => 'BookTitle' },
148             chapter => { },
149             crossref => { Name => 'CrossRef' },
150             edition => { },
151             eprint => { Name => 'EPrint' },
152             howpublished=> { Name => 'HowPublished' },
153             institution => { },
154             journal => { },
155             key => { },
156             month => { Groups => { 2 => 'Time' } },
157             note => { },
158             number => { },
159             organization=> { },
160             pages => { },
161             publisher => { },
162             school => { },
163             series => { },
164             title => { },
165             type => { },
166             url => { Name => 'URL' },
167             volume => { },
168             year => { Groups => { 2 => 'Time' } },
169             # PDF tags (same as Image::ExifTool::PDF::Info)
170             Title => { },
171             Author => { Groups => { 2 => 'Author' } },
172             Subject => { },
173             Keywords => { },
174             Creator => { },
175             Producer => { },
176             CreationDate => {
177             Name => 'CreateDate',
178             Groups => { 2 => 'Time' },
179             # RFC 3339 date/time format
180             ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
181             PrintConv => '$self->ConvertDateTime($val)',
182             },
183             ModDate => {
184             Name => 'ModifyDate',
185             Groups => { 2 => 'Time' },
186             ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
187             PrintConv => '$self->ConvertDateTime($val)',
188             },
189             Trapped => {
190             # remove leading '/' from '/True' or '/False'
191             ValueConv => '$val=~s{^/}{}; $val',
192             },
193             );
194              
195             #------------------------------------------------------------------------------
196             # Parse DjVu annotation "s-expression" syntax (recursively)
197             # Inputs: 0) data ref (with pos($$dataPt) set to start of annotation)
198             # Returns: reference to list of tokens/references, or undef if no tokens,
199             # and the position in $$dataPt is set to end of last token
200             # Notes: The DjVu annotation syntax is not well documented, so I make
201             # a number of assumptions here!
202             sub ParseAnt($)
203             {
204 15     15 0 20 my $dataPt = shift;
205 15         22 my (@toks, $tok, $more);
206             # (the DjVu annotation syntax really sucks, and requires that every
207             # single token be parsed in order to properly scan through the items)
208 15         19 Tok: for (;;) {
209             # find the next token
210 56 100       147 last unless $$dataPt =~ /(\S)/sg; # get next non-space character
211 55 100       158 if ($1 eq '(') { # start of list
    100          
    100          
212 14         32 $tok = ParseAnt($dataPt);
213             } elsif ($1 eq ')') { # end of list
214 14         17 $more = 1;
215 14         23 last;
216             } elsif ($1 eq '"') { # quoted string
217 13         19 $tok = '';
218 13         14 for (;;) {
219             # get string up to the next quotation mark
220             # this doesn't work in perl 5.6.2! grrrr
221             # last Tok unless $$dataPt =~ /(.*?)"/sg;
222             # $tok .= $1;
223 16         24 my $pos = pos($$dataPt);
224 16 50       37 last Tok unless $$dataPt =~ /"/sg;
225 16         47 $tok .= substr($$dataPt, $pos, pos($$dataPt)-1-$pos);
226             # we're good unless quote was escaped by odd number of backslashes
227 16 100 66     67 last unless $tok =~ /(\\+)$/ and length($1) & 0x01;
228 3         7 $tok .= '"'; # quote is part of the string
229             }
230             # convert C escape sequences, allowed in quoted text
231             # (note: this only converts a few of them!)
232 13         52 my %esc = ( a => "\a", b => "\b", f => "\f", n => "\n",
233             r => "\r", t => "\t", '"' => '"', '\\' => '\\' );
234 13 50       40 $tok =~ s/\\(.)/$esc{$1}||'\\'.$1/egs;
  57         180  
235             } else { # key name
236 14         30 pos($$dataPt) = pos($$dataPt) - 1;
237             # allow anything in key but whitespace, braces and double quotes
238             # (this is one of those assumptions I mentioned)
239 14 50       58 $tok = $$dataPt =~ /([^\s()"]+)/sg ? $1 : undef;
240             }
241 41 50       121 push @toks, $tok if defined $tok;
242             }
243             # prevent further parsing unless more after this
244 15 100       27 pos($$dataPt) = length $$dataPt unless $more;
245 15 50       37 return @toks ? \@toks : undef;
246             }
247              
248             #------------------------------------------------------------------------------
249             # Process DjVu annotation chunk (ANTa or decoded ANTz)
250             # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
251             # Returns: 1 on success
252             sub ProcessAnt($$$)
253             {
254 1     1 0 4 my ($et, $dirInfo, $tagTablePtr) = @_;
255 1         3 my $dataPt = $$dirInfo{DataPt};
256              
257             # quick pre-scan to check for metadata or XMP
258 1 50       24 return 1 unless $$dataPt =~ /\(\s*(metadata|xmp)[\s("]/s;
259              
260             # parse annotations into a tree structure
261 1         7 pos($$dataPt) = 0;
262 1 50       7 my $toks = ParseAnt($dataPt) or return 0;
263              
264             # process annotations individually
265 1         3 my $ant;
266 1         3 foreach $ant (@$toks) {
267 2 50 33     13 next unless ref $ant eq 'ARRAY' and @$ant >= 2;
268 2         6 my $tag = shift @$ant;
269 2 50 33     11 next if ref $tag or not defined $$tagTablePtr{$tag};
270 2 100       8 if ($tag eq 'metadata') {
271             # ProcessMeta() takes array reference
272 1         13 $et->HandleTag($tagTablePtr, $tag, $ant);
273             } else {
274 1 50       4 next if ref $$ant[0]; # only process simple values
275 1         6 $et->HandleTag($tagTablePtr, $tag, $$ant[0]);
276             }
277             }
278 1         12 return 1;
279             }
280              
281             #------------------------------------------------------------------------------
282             # Process DjVu metadata
283             # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
284             # Returns: 1 on success
285             # Notes: input dirInfo DataPt is a reference to a list of pre-parsed metadata entries
286             sub ProcessMeta($$$)
287             {
288 1     1 0 3 my ($et, $dirInfo, $tagTablePtr) = @_;
289 1         3 my $dataPt = $$dirInfo{DataPt};
290 1 50       5 return 0 unless ref $$dataPt eq 'ARRAY';
291 1         8 $et->VerboseDir('Metadata', scalar @$$dataPt);
292 1         2 my ($item, $err);
293 1         4 foreach $item (@$$dataPt) {
294             # make sure item is a simple tag/value pair
295 12 50 33     70 $err=1, next unless ref $item eq 'ARRAY' and @$item >= 2 and
      33        
      33        
296             not ref $$item[0] and not ref $$item[1];
297             # add any new tags to the table
298 12 50       28 unless ($$tagTablePtr{$$item[0]}) {
299 0         0 my $name = $$item[0];
300 0         0 $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
301 0 0       0 length $name or $err = 1, next;
302 0         0 AddTagToTable($tagTablePtr, $$item[0], { Name => ucfirst($name) });
303             }
304 12         34 $et->HandleTag($tagTablePtr, $$item[0], $$item[1]);
305             }
306 1 50       6 $err and $et->Warn('Ignored invalid metadata entry(s)');
307 1         4 return 1;
308             }
309              
310             #------------------------------------------------------------------------------
311             # Process BZZ-compressed data (in DjVu images)
312             # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
313             # Returns: 1 on success
314             sub ProcessBZZ($$$)
315             {
316 1     1 0 4 my ($et, $dirInfo, $tagTablePtr) = @_;
317 1         565 require Image::ExifTool::BZZ;
318 1         7 my $buff = Image::ExifTool::BZZ::Decode($$dirInfo{DataPt});
319 1 50       8 unless (defined $buff) {
320 0         0 $et->Warn("Error decoding $$dirInfo{DirName}");
321 0         0 return 0;
322             }
323 1         15 my $verbose = $et->Options('Verbose');
324 1 50       6 if ($verbose >= 3) {
325             # dump the decoded data in very verbose mode
326 0         0 $et->VerboseDir("Decoded $$dirInfo{DirName}", 0, length $buff);
327 0         0 $et->VerboseDump(\$buff);
328             }
329 1         5 $$dirInfo{DataPt} = \$buff;
330 1         7 $$dirInfo{DataLen} = $$dirInfo{DirLen} = length $buff;
331             # process the data using the default process proc for this table
332 1 50       8 my $processProc = $$tagTablePtr{PROCESS_PROC} or return 0;
333 1         6 return &$processProc($et, $dirInfo, $tagTablePtr);
334             }
335              
336             1; # end
337              
338             __END__