File Coverage

blib/lib/Image/ExifTool/Import.pm
Criterion Covered Total %
statement 92 162 56.7
branch 52 134 38.8
condition 11 34 32.3
subroutine 4 6 66.6
pod 2 4 50.0
total 161 340 47.3


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: Import.pm
3             #
4             # Description: Import CSV and JSON database files
5             #
6             # Revisions: 2011-03-05 - P. Harvey Created
7             #------------------------------------------------------------------------------
8             package Image::ExifTool::Import;
9              
10 6     6   44 use strict;
  6         15  
  6         236  
11             require Exporter;
12              
13 6     6   34 use vars qw($VERSION @ISA @EXPORT_OK);
  6         14  
  6         13540  
14              
15             $VERSION = '1.10';
16             @ISA = qw(Exporter);
17             @EXPORT_OK = qw(ReadCSV ReadJSON);
18              
19             sub ReadJSONObject($;$);
20              
21             my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r", 'b' => "\b", 'f' => "\f" );
22             my $charset;
23              
24             #------------------------------------------------------------------------------
25             # Read CSV file
26             # Inputs: 0) CSV file name, file ref or RAF ref, 1) database hash ref,
27             # 2) missing tag value, 3) delimiter if other than ','
28             # Returns: undef on success, or error string
29             # Notes: There are various flavours of CSV, but here we assume that only
30             # double quotes are escaped, and they are escaped by doubling them
31             sub ReadCSV($$;$$)
32             {
33 0     0 1 0 local ($_, $/);
34 0         0 my ($file, $database, $missingValue, $delim) = @_;
35 0         0 my ($buff, @tags, $found, $err, $raf, $openedFile);
36              
37 0 0       0 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
38 0         0 $raf = $file;
39 0         0 $file = 'CSV file';
40             } elsif (ref $file eq 'GLOB') {
41 0         0 $raf = new File::RandomAccess($file);
42 0         0 $file = 'CSV file';
43             } else {
44 0 0       0 open CSVFILE, $file or return "Error opening CSV file '${file}'";
45 0         0 binmode CSVFILE;
46 0         0 $openedFile = 1;
47 0         0 $raf = new File::RandomAccess(\*CSVFILE);
48             }
49 0 0       0 $delim = ',' unless defined $delim;
50             # set input record separator by first newline found in the file
51             # (safe because first line should contain only tag names)
52 0         0 while ($raf->Read($buff, 65536)) {
53 0 0       0 $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
54             }
55 0         0 $raf->Seek(0,0);
56 0         0 while ($raf->ReadLine($buff)) {
57 0         0 my (@vals, $v, $i, %fileInfo);
58 0         0 my @toks = split /\Q$delim/, $buff;
59 0         0 while (@toks) {
60 0         0 ($v = shift @toks) =~ s/^ +//; # remove leading spaces
61 0 0       0 if ($v =~ s/^"//) {
62             # quoted value must end in an odd number of quotes
63 0   0     0 while ($v !~ /("+)\s*$/ or not length($1) & 1) {
64 0 0       0 if (@toks) {
65 0         0 $v .= $delim . shift @toks;
66             } else {
67             # read another line from the file
68 0 0       0 $raf->ReadLine($buff) or last;
69 0         0 @toks = split /\Q$delim/, $buff;
70 0 0       0 last unless @toks;
71 0         0 $v .= shift @toks;
72             }
73             }
74 0         0 $v =~ s/"\s*$//; # remove trailing quote and whitespace
75 0         0 $v =~ s/""/"/g; # un-escape quotes
76             } else {
77 0         0 $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
78             }
79 0         0 push @vals, $v;
80             }
81 0 0       0 if (@tags) {
82             # save values for each tag
83 0   0     0 for ($i=0; $i<@vals and $i<@tags; ++$i) {
84             # ignore empty entries unless missingValue is empty too
85 0 0 0     0 next unless length $vals[$i] or defined $missingValue and $missingValue eq '';
      0        
86             # delete tag (set value to undef) if value is same as missing tag
87 0 0 0     0 $fileInfo{$tags[$i]} =
88             (defined $missingValue and $vals[$i] eq $missingValue) ? undef : $vals[$i];
89             }
90             # figure out the file name to use
91 0 0       0 if ($fileInfo{SourceFile}) {
92 0         0 $$database{$fileInfo{SourceFile}} = \%fileInfo;
93 0         0 $found = 1;
94             }
95             } else {
96             # the first row should be the tag names
97 0         0 foreach (@vals) {
98             # terminate at first blank tag name (eg. extra comma at end of line)
99 0 0       0 last unless length $_;
100 0 0       0 @tags or s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
101 0 0       0 /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '${_}'", last;
102 0         0 push(@tags, $_);
103             }
104 0 0       0 last if $err;
105 0 0       0 @tags or $err = 'No tags found', last;
106             # fix "SourceFile" case if necessary
107 0 0       0 $tags[0] = 'SourceFile' if lc $tags[0] eq 'sourcefile';
108             }
109             }
110 0 0       0 close CSVFILE if $openedFile;
111 0         0 undef $raf;
112 0 0 0     0 $err = 'No SourceFile column' unless $found or $err;
113 0 0       0 return $err ? "$err in $file" : undef;
114             }
115              
116             #------------------------------------------------------------------------------
117             # Convert unicode code point to UTF-8
118             # Inputs: 0) integer Unicode character
119             # Returns: UTF-8 bytes
120             sub ToUTF8($)
121             {
122 0     0 0 0 require Image::ExifTool::Charset;
123 0         0 return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
124             }
125              
126             #------------------------------------------------------------------------------
127             # Read JSON object from file
128             # Inputs: 0) RAF reference or undef, 1) optional scalar reference for data
129             # to read before reading from file (ie. the file read buffer)
130             # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
131             # empty object or array (and sets $$buffPt to empty string on EOF)
132             # Notes: position in buffer is significant
133             sub ReadJSONObject($;$)
134             {
135 501     501 0 881 my ($raf, $buffPt) = @_;
136             # initialize buffer if necessary
137 501         745 my ($pos, $readMore, $rtnVal, $tok, $key, $didBOM);
138 501 100       835 if ($buffPt) {
139 443         643 $pos = pos $$buffPt;
140 443 50       883 $pos = pos($$buffPt) = 0 unless defined $pos;
141             } else {
142 58         103 my $buff = '';
143 58         122 $buffPt = \$buff;
144 58         128 $pos = 0;
145             }
146 501         668 Tok: for (;;) {
147             # (didn't spend the time to understand how $pos could be undef, but
148             # put a test here to be safe because one user reported this problem)
149 501 50       930 last unless defined $pos;
150 501 100 66     1603 if ($pos >= length $$buffPt or $readMore) {
151 58 50       180 last unless defined $raf;
152             # read another 64kB and add to unparsed data
153 58         128 my $offset = length($$buffPt) - $pos;
154 58 50       173 if ($offset) {
155 0         0 my $buff;
156 0 0       0 $raf->Read($buff, 65536) or $$buffPt = '', last;
157 0         0 $$buffPt = substr($$buffPt, $pos) . $buff;
158             } else {
159 58 50       225 $raf->Read($$buffPt, 65536) or $$buffPt = '', last;
160             }
161 58 50       200 unless ($didBOM) {
162 58         157 $$buffPt =~ s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
163 58         102 $didBOM = 1;
164             }
165 58         228 $pos = pos($$buffPt) = 0;
166 58         137 $readMore = 0;
167             }
168 501 50       952 unless ($tok) {
169             # skip white space and find next character
170 501 50       1344 $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
171 501         951 $tok = $1;
172 501         786 $pos = pos $$buffPt;
173             }
174             # see what type of object this is
175 501 100 100     1489 if ($tok eq '{') { # object (hash)
    100 66        
    100          
    100          
176 104 50       303 $rtnVal = { } unless defined $rtnVal;
177 104         184 for (;;) {
178             # read "KEY":"VALUE" pairs
179 205 50       411 unless (defined $key) {
180 205         513 $key = ReadJSONObject($raf, $buffPt);
181 205         376 $pos = pos $$buffPt;
182             }
183             # ($key may be undef for empty JSON object)
184 205 100       435 if (defined $key) {
185             # scan to delimiting ':'
186 202 50       549 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
187 202 50       513 $1 eq ':' or return undef; # error if not a colon
188 202         405 my $val = ReadJSONObject($raf, $buffPt);
189 202         345 $pos = pos $$buffPt;
190 202 50       446 return undef unless defined $val;
191 202         518 $$rtnVal{$key} = $val;
192 202         396 undef $key;
193             }
194             # scan to delimiting ',' or bounding '}'
195 205 50       639 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
196 205 100       517 last if $1 eq '}'; # check for end of object
197 101 50       187 $1 eq ',' or return undef; # error if not a comma
198             }
199             } elsif ($tok eq '[') { # array
200 17 50       47 $rtnVal = [ ] unless defined $rtnVal;
201 17         28 for (;;) {
202 33         81 my $item = ReadJSONObject($raf, $buffPt);
203 33         52 $pos = pos $$buffPt;
204             # ($item may be undef for empty array)
205 33 100       79 push @$rtnVal, $item if defined $item;
206             # scan to delimiting ',' or bounding ']'
207 33 50       92 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
208 33 100       77 last if $1 eq ']'; # check for end of array
209 16 50       34 $1 eq ',' or return undef; # error if not a comma
210             }
211             } elsif ($tok eq '"') { # quoted string
212 289         405 for (;;) {
213 289 50       1071 $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
214 289 50       854 last unless length($1) & 1; # check for escaped quote
215             }
216 289         736 $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
217             # unescape characters
218 289         529 $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
  0         0  
219 289 0       435 $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
  0         0  
220             # decode base64 (binary data) values
221 289 50 33     693 if ($rtnVal =~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and length($rtnVal) % 4 == 3) {
222 0         0 require Image::ExifTool::XMP;
223 0         0 $rtnVal = ${Image::ExifTool::XMP::DecodeBase64(substr($rtnVal,7))};
  0         0  
224             }
225             } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
226             # return undef for empty object, array, or list item
227             # (empty list item actually not valid JSON)
228 8         25 pos($$buffPt) = pos($$buffPt) - 1;
229             } else { # number, 'true', 'false', 'null'
230 83 50       203 $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
231 83         169 pos($$buffPt) = pos($$buffPt) - 1;
232 83         209 $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
233             }
234 501         673 last;
235             }
236 501         1055 return $rtnVal;
237             }
238              
239             #------------------------------------------------------------------------------
240             # Read JSON file
241             # Inputs: 0) JSON file name, file ref or RAF ref, 1) database hash ref,
242             # 2) flag to delete "-" tags, 3) character set
243             # Returns: undef on success, or error string
244             sub ReadJSON($$;$$)
245             {
246 58     58 1 101 local $_;
247 58         155 my ($file, $database, $missingValue, $chset) = @_;
248 58         121 my ($raf, $openedFile);
249              
250             # initialize character set for converting "\uHHHH" chars
251 58   50     157 $charset = $chset || 'UTF8';
252 58 50       270 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
253 58         99 $raf = $file;
254 58         111 $file = 'JSON file';
255             } elsif (ref $file eq 'GLOB') {
256 0         0 $raf = new File::RandomAccess($file);
257 0         0 $file = 'JSON file';
258             } else {
259 0 0       0 open JSONFILE, $file or return "Error opening JSON file '${file}'";
260 0         0 binmode JSONFILE;
261 0         0 $openedFile = 1;
262 0         0 $raf = new File::RandomAccess(\*JSONFILE);
263             }
264 58         173 my $obj = ReadJSONObject($raf);
265 58 50       195 close JSONFILE if $openedFile;
266 58 50       180 unless (ref $obj eq 'ARRAY') {
267 58 50       173 ref $obj eq 'HASH' or return "Format error in JSON file '${file}'";
268 58         138 $obj = [ $obj ];
269             }
270 58         115 my ($info, $found);
271 58         149 foreach $info (@$obj) {
272 58 50       164 next unless ref $info eq 'HASH';
273             # fix "SourceFile" case, or assume '*' if SourceFile not specified
274 58 50       182 unless (defined $$info{SourceFile}) {
275 58         330 my ($key) = grep /^SourceFile$/i, keys %$info;
276 58 50       157 if ($key) {
277 0         0 $$info{SourceFile} = $$info{$key};
278 0         0 delete $$info{$key};
279             } else {
280 58         154 $$info{SourceFile} = '*';
281             }
282             }
283 58 100       191 if (defined $missingValue) {
284 1   100     18 $$info{$_} eq $missingValue and $$info{$_} = undef foreach keys %$info;
285             }
286 58         189 $$database{$$info{SourceFile}} = $info;
287 58         137 $found = 1;
288             }
289 58 50       272 return $found ? undef : "No valid JSON objects in '${file}'";
290             }
291              
292              
293             1; # end
294              
295             __END__