File Coverage

blib/lib/Image/ExifTool/Import.pm
Criterion Covered Total %
statement 92 162 56.7
branch 52 134 38.8
condition 11 34 32.3
subroutine 4 6 66.6
pod 2 4 50.0
total 161 340 47.3


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: Import.pm
3             #
4             # Description: Import CSV and JSON database files
5             #
6             # Revisions: 2011-03-05 - P. Harvey Created
7             #------------------------------------------------------------------------------
8             package Image::ExifTool::Import;
9              
10 6     6   43 use strict;
  6         15  
  6         247  
11             require Exporter;
12              
13 6     6   40 use vars qw($VERSION @ISA @EXPORT_OK);
  6         14  
  6         13302  
14              
15             $VERSION = '1.10';
16             @ISA = qw(Exporter);
17             @EXPORT_OK = qw(ReadCSV ReadJSON);
18              
19             sub ReadJSONObject($;$);
20              
21             my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r", 'b' => "\b", 'f' => "\f" );
22             my $charset;
23              
24             #------------------------------------------------------------------------------
25             # Read CSV file
26             # Inputs: 0) CSV file name, file ref or RAF ref, 1) database hash ref,
27             # 2) missing tag value, 3) delimiter if other than ','
28             # Returns: undef on success, or error string
29             # Notes: There are various flavours of CSV, but here we assume that only
30             # double quotes are escaped, and they are escaped by doubling them
31             sub ReadCSV($$;$$)
32             {
33 0     0 1 0 local ($_, $/);
34 0         0 my ($file, $database, $missingValue, $delim) = @_;
35 0         0 my ($buff, @tags, $found, $err, $raf, $openedFile);
36              
37 0 0       0 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
38 0         0 $raf = $file;
39 0         0 $file = 'CSV file';
40             } elsif (ref $file eq 'GLOB') {
41 0         0 $raf = new File::RandomAccess($file);
42 0         0 $file = 'CSV file';
43             } else {
44 0 0       0 open CSVFILE, $file or return "Error opening CSV file '${file}'";
45 0         0 binmode CSVFILE;
46 0         0 $openedFile = 1;
47 0         0 $raf = new File::RandomAccess(\*CSVFILE);
48             }
49 0 0       0 $delim = ',' unless defined $delim;
50             # set input record separator by first newline found in the file
51             # (safe because first line should contain only tag names)
52 0         0 while ($raf->Read($buff, 65536)) {
53 0 0       0 $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
54             }
55 0         0 $raf->Seek(0,0);
56 0         0 while ($raf->ReadLine($buff)) {
57 0         0 my (@vals, $v, $i, %fileInfo);
58 0         0 my @toks = split /\Q$delim/, $buff;
59 0         0 while (@toks) {
60 0         0 ($v = shift @toks) =~ s/^ +//; # remove leading spaces
61 0 0       0 if ($v =~ s/^"//) {
62             # quoted value must end in an odd number of quotes
63 0   0     0 while ($v !~ /("+)\s*$/ or not length($1) & 1) {
64 0 0       0 if (@toks) {
65 0         0 $v .= $delim . shift @toks;
66             } else {
67             # read another line from the file
68 0 0       0 $raf->ReadLine($buff) or last;
69 0         0 @toks = split /\Q$delim/, $buff;
70 0 0       0 last unless @toks;
71 0         0 $v .= shift @toks;
72             }
73             }
74 0         0 $v =~ s/"\s*$//; # remove trailing quote and whitespace
75 0         0 $v =~ s/""/"/g; # un-escape quotes
76             } else {
77 0         0 $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
78             }
79 0         0 push @vals, $v;
80             }
81 0 0       0 if (@tags) {
82             # save values for each tag
83 0   0     0 for ($i=0; $i<@vals and $i<@tags; ++$i) {
84             # ignore empty entries unless missingValue is empty too
85 0 0 0     0 next unless length $vals[$i] or defined $missingValue and $missingValue eq '';
      0        
86             # delete tag (set value to undef) if value is same as missing tag
87 0 0 0     0 $fileInfo{$tags[$i]} =
88             (defined $missingValue and $vals[$i] eq $missingValue) ? undef : $vals[$i];
89             }
90             # figure out the file name to use
91 0 0       0 if ($fileInfo{SourceFile}) {
92 0         0 $$database{$fileInfo{SourceFile}} = \%fileInfo;
93 0         0 $found = 1;
94             }
95             } else {
96             # the first row should be the tag names
97 0         0 foreach (@vals) {
98             # terminate at first blank tag name (eg. extra comma at end of line)
99 0 0       0 last unless length $_;
100 0 0       0 @tags or s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
101 0 0       0 /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '${_}'", last;
102 0         0 push(@tags, $_);
103             }
104 0 0       0 last if $err;
105 0 0       0 @tags or $err = 'No tags found', last;
106             # fix "SourceFile" case if necessary
107 0 0       0 $tags[0] = 'SourceFile' if lc $tags[0] eq 'sourcefile';
108             }
109             }
110 0 0       0 close CSVFILE if $openedFile;
111 0         0 undef $raf;
112 0 0 0     0 $err = 'No SourceFile column' unless $found or $err;
113 0 0       0 return $err ? "$err in $file" : undef;
114             }
115              
116             #------------------------------------------------------------------------------
117             # Convert unicode code point to UTF-8
118             # Inputs: 0) integer Unicode character
119             # Returns: UTF-8 bytes
120             sub ToUTF8($)
121             {
122 0     0 0 0 require Image::ExifTool::Charset;
123 0         0 return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
124             }
125              
126             #------------------------------------------------------------------------------
127             # Read JSON object from file
128             # Inputs: 0) RAF reference or undef, 1) optional scalar reference for data
129             # to read before reading from file (ie. the file read buffer)
130             # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
131             # empty object or array (and sets $$buffPt to empty string on EOF)
132             # Notes: position in buffer is significant
133             sub ReadJSONObject($;$)
134             {
135 501     501 0 864 my ($raf, $buffPt) = @_;
136             # initialize buffer if necessary
137 501         711 my ($pos, $readMore, $rtnVal, $tok, $key, $didBOM);
138 501 100       856 if ($buffPt) {
139 443         640 $pos = pos $$buffPt;
140 443 50       783 $pos = pos($$buffPt) = 0 unless defined $pos;
141             } else {
142 58         94 my $buff = '';
143 58         94 $buffPt = \$buff;
144 58         126 $pos = 0;
145             }
146 501         653 Tok: for (;;) {
147             # (didn't spend the time to understand how $pos could be undef, but
148             # put a test here to be safe because one user reported this problem)
149 501 50       857 last unless defined $pos;
150 501 100 66     1460 if ($pos >= length $$buffPt or $readMore) {
151 58 50       156 last unless defined $raf;
152             # read another 64kB and add to unparsed data
153 58         118 my $offset = length($$buffPt) - $pos;
154 58 50       141 if ($offset) {
155 0         0 my $buff;
156 0 0       0 $raf->Read($buff, 65536) or $$buffPt = '', last;
157 0         0 $$buffPt = substr($$buffPt, $pos) . $buff;
158             } else {
159 58 50       207 $raf->Read($$buffPt, 65536) or $$buffPt = '', last;
160             }
161 58 50       169 unless ($didBOM) {
162 58         129 $$buffPt =~ s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
163 58         98 $didBOM = 1;
164             }
165 58         274 $pos = pos($$buffPt) = 0;
166 58         155 $readMore = 0;
167             }
168 501 50       864 unless ($tok) {
169             # skip white space and find next character
170 501 50       1360 $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
171 501         934 $tok = $1;
172 501         733 $pos = pos $$buffPt;
173             }
174             # see what type of object this is
175 501 100 100     1432 if ($tok eq '{') { # object (hash)
    100 66        
    100          
    100          
176 104 50       286 $rtnVal = { } unless defined $rtnVal;
177 104         168 for (;;) {
178             # read "KEY":"VALUE" pairs
179 205 50       431 unless (defined $key) {
180 205         468 $key = ReadJSONObject($raf, $buffPt);
181 205         343 $pos = pos $$buffPt;
182             }
183             # ($key may be undef for empty JSON object)
184 205 100       445 if (defined $key) {
185             # scan to delimiting ':'
186 202 50       544 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
187 202 50       530 $1 eq ':' or return undef; # error if not a colon
188 202         377 my $val = ReadJSONObject($raf, $buffPt);
189 202         325 $pos = pos $$buffPt;
190 202 50       416 return undef unless defined $val;
191 202         528 $$rtnVal{$key} = $val;
192 202         365 undef $key;
193             }
194             # scan to delimiting ',' or bounding '}'
195 205 50       620 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
196 205 100       510 last if $1 eq '}'; # check for end of object
197 101 50       189 $1 eq ',' or return undef; # error if not a comma
198             }
199             } elsif ($tok eq '[') { # array
200 17 50       41 $rtnVal = [ ] unless defined $rtnVal;
201 17         28 for (;;) {
202 33         63 my $item = ReadJSONObject($raf, $buffPt);
203 33         52 $pos = pos $$buffPt;
204             # ($item may be undef for empty array)
205 33 100       78 push @$rtnVal, $item if defined $item;
206             # scan to delimiting ',' or bounding ']'
207 33 50       86 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
208 33 100       80 last if $1 eq ']'; # check for end of array
209 16 50       62 $1 eq ',' or return undef; # error if not a comma
210             }
211             } elsif ($tok eq '"') { # quoted string
212 289         394 for (;;) {
213 289 50       1034 $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
214 289 50       723 last unless length($1) & 1; # check for escaped quote
215             }
216 289         702 $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
217             # unescape characters
218 289         544 $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
  0         0  
219 289 0       448 $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
  0         0  
220             # decode base64 (binary data) values
221 289 50 33     662 if ($rtnVal =~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and length($rtnVal) % 4 == 3) {
222 0         0 require Image::ExifTool::XMP;
223 0         0 $rtnVal = ${Image::ExifTool::XMP::DecodeBase64(substr($rtnVal,7))};
  0         0  
224             }
225             } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
226             # return undef for empty object, array, or list item
227             # (empty list item actually not valid JSON)
228 8         20 pos($$buffPt) = pos($$buffPt) - 1;
229             } else { # number, 'true', 'false', 'null'
230 83 50       222 $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
231 83         194 pos($$buffPt) = pos($$buffPt) - 1;
232 83         202 $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
233             }
234 501         691 last;
235             }
236 501         1017 return $rtnVal;
237             }
238              
239             #------------------------------------------------------------------------------
240             # Read JSON file
241             # Inputs: 0) JSON file name, file ref or RAF ref, 1) database hash ref,
242             # 2) flag to delete "-" tags, 3) character set
243             # Returns: undef on success, or error string
244             sub ReadJSON($$;$$)
245             {
246 58     58 1 102 local $_;
247 58         152 my ($file, $database, $missingValue, $chset) = @_;
248 58         104 my ($raf, $openedFile);
249              
250             # initialize character set for converting "\uHHHH" chars
251 58   50     168 $charset = $chset || 'UTF8';
252 58 50       279 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
253 58         97 $raf = $file;
254 58         124 $file = 'JSON file';
255             } elsif (ref $file eq 'GLOB') {
256 0         0 $raf = new File::RandomAccess($file);
257 0         0 $file = 'JSON file';
258             } else {
259 0 0       0 open JSONFILE, $file or return "Error opening JSON file '${file}'";
260 0         0 binmode JSONFILE;
261 0         0 $openedFile = 1;
262 0         0 $raf = new File::RandomAccess(\*JSONFILE);
263             }
264 58         176 my $obj = ReadJSONObject($raf);
265 58 50       148 close JSONFILE if $openedFile;
266 58 50       177 unless (ref $obj eq 'ARRAY') {
267 58 50       153 ref $obj eq 'HASH' or return "Format error in JSON file '${file}'";
268 58         135 $obj = [ $obj ];
269             }
270 58         125 my ($info, $found);
271 58         152 foreach $info (@$obj) {
272 58 50       172 next unless ref $info eq 'HASH';
273             # fix "SourceFile" case, or assume '*' if SourceFile not specified
274 58 50       172 unless (defined $$info{SourceFile}) {
275 58         344 my ($key) = grep /^SourceFile$/i, keys %$info;
276 58 50       167 if ($key) {
277 0         0 $$info{SourceFile} = $$info{$key};
278 0         0 delete $$info{$key};
279             } else {
280 58         160 $$info{SourceFile} = '*';
281             }
282             }
283 58 100       162 if (defined $missingValue) {
284 1   100     16 $$info{$_} eq $missingValue and $$info{$_} = undef foreach keys %$info;
285             }
286 58         164 $$database{$$info{SourceFile}} = $info;
287 58         135 $found = 1;
288             }
289 58 50       281 return $found ? undef : "No valid JSON objects in '${file}'";
290             }
291              
292              
293             1; # end
294              
295             __END__