File Coverage

blib/lib/Image/MetaData/JPEG/parsers/app1_xmp.pl
Criterion Covered Total %
statement 140 195 71.7
branch 52 118 44.0
condition 13 25 52.0
subroutine 18 24 75.0
pod 0 17 0.0
total 223 379 58.8


line stmt bran cond sub pod time code
1             ###########################################################
2             # A Perl package for showing/modifying JPEG (meta)data. #
3             # Copyright (C) 2004,2005,2006 Stefano Bettelli #
4             # See the COPYING and LICENSE files for license terms. #
5             ###########################################################
6 15     15   67 use Image::MetaData::JPEG::data::Tables qw(:TagsAPP1_XMP);
  15         21  
  15         1736  
7 15     15   72 no integer;
  15         19  
  15         62  
8 15     15   261 use strict;
  15         22  
  15         376  
9 15     15   54 use warnings;
  15         18  
  15         1685  
10              
11             ###########################################################
12             # This method is the entry point for APP1 XMP segments. #
13             # Such APP1 segments are used by Adobe for recording an #
14             # XMP packet in JPEG files (this is a special XML block #
15             # storing metadata information, similarly to Exif APP1 or #
16             # IPTC APP13). The advantage of XMP is that it is exten- #
17             # sible and that it can be embedded in many file types, #
18             # like JPEG, PNG, GIF, HTML, PDF, PostScript, ecc... #
19             # Only the envelope changes. The format is the following: #
20             #---------------------------------------------------------#
21             # 29 bytes namespace = http://ns.adobe.com/xap/1.0/\000 #
22             # .... XMP packet (in some Unicode encoding) #
23             #=========================================================#
24             # First, check that the mandatory Adobe namespace string #
25             # is there. Then, parse the XML and save the intermediate #
26             # results. Last, Check that the XML block conforms to the #
27             # RDF and XMP specifications (issue an error otherwise). #
28             ###########################################################
29             # Ref: "XMP Specification", version 3.2, June 2005, Adobe #
30             # Systems Inc., San Jose, CA, http://www.adobe.com #
31             ###########################################################
32             sub parse_app1_xmp {
33 1     1 0 1 my ($this) = @_;
34             # slurp the segment as a single string
35 1         4 my $packet = $this->read_record($ASCII, 0, $this->size());
36             # get rid of newline chars
37 1         15 $packet =~ y/\n\r//d;
38             # the ID must be Adobe's namespace; die if it is not correct
39 1         64 $packet =~ s/^($APP1_XMP_TAG|.{0,15})(.*)$/$2/;
40 1 50       6 $this->die("Incorrect XMP namespace ($1)") unless $1 eq $APP1_XMP_TAG;
41 1         5 $this->store_record('NAMESPACE', $ASCII, \ "$1");
42             # (TODO): find the used Unicode encoding and deal with it
43 15     15   8325 use Encode; Encode::_utf8_on($packet);
  15         120304  
  15         31428  
  1         13  
44             # analyse the XML packet (this cannot fail)
45 1         6 $this->parse_xml_string(\ $packet); # writes into $this->{private_list}
46             #print join '::', @$_, "\n" for @{$this->{private_list}};
47             # check header (xpacket, x:x[am]pmeta and the outer rdf:RDF)
48 1         5 $this->test_xmp_header();
49             # test that XMP syntax is correct; [Dlist(ABOUT)] := [Desc(ABOUT)]+
50 1         5 $this->parse_rdf_description()
51             while $this->list_equal(['OPEN', 'rdf:Description']);
52             # cleanup
53 1         9 delete $this->{private_list};
54             }
55              
56             ###########################################################
57             # This private method runs a series of regular expression #
58             # match tests against the private list (starting at posi- #
59             # tion $offset). $regexps_array is either a reference to #
60             # a list of references to regexp rules, or a reference to #
61             # a single such list. A regexp rule consists of a list of #
62             # regular express.s and variables to assign submatches to.#
63             ###########################################################
64             sub list_equal {
65 50     50 0 46 my ($this, $regexps_array, $offset) = (@_, 0);
66             # convert a single rule into a list of rules
67 50 100       133 $regexps_array = [$regexps_array] unless ref $$regexps_array[0] eq 'ARRAY';
68             # check each rule separately, return as soon as possible
69 50         80 for my $pos ($offset..$offset + $#$regexps_array) {
70 52 100       91 return 0 unless exists $this->{private_list}->[$pos];
71             # do not modify the private list for the time being
72 51         39 my $elements = [ @{$this->{private_list}->[$pos]} ];
  51         87  
73 51         44 my $regexps = $regexps_array->[$pos];
74 51         36 while (@{$regexps}) {
  139         250  
75 102 50       139 return 0 unless @$elements;
76 102         106 my ($e, $r) = (shift(@$elements), shift(@$regexps));
77 102 100       964 my @matches = $e =~ /^$r$/; return 0 unless @matches;
  102         227  
78 88         177 ${shift @$regexps} = shift @matches while ref $$regexps[0]; } }
  19         43  
79 35         82 return 1 + $#$regexps_array; }
80              
81             ###########################################################
82             # This private method is almost the same as list_equal, #
83             # but, if the match is positive, it also removes matching #
84             # lines from the private list. #
85             ###########################################################
86             sub list_extract {
87 39     39 0 45 my ($this, $regexps_array, $offset, $number) = (@_, 0);
88 39   100     75 my $lines = $this->list_equal($regexps_array, $offset) || return 0;
89 29         24 splice @{$this->{private_list}}, $offset, $lines; return 1; }
  29         44  
  29         76  
90              
91             ###########################################################
92             # Private method for saving a piece of information into #
93             # the private list (always undefined type). Arguments are:#
94             # $pdir --> (list ref) identifies a subdirectory #
95             # $name --> of the Record to be saved #
96             # $value --> content to be saved in the Record #
97             # $extra --> optonal info for {extra} field of a Record #
98             ###########################################################
99             sub store_xmp_value {
100 10     10 0 15 my ($this, $pdir, $name, $value, $extra) = @_;
101 10         29 my $rec = $this->store_record
102             ($this->provide_subdirectory(@$pdir), $name, $UNDEF, \$value);
103 10 100       42 $rec->{extra} = $extra if $extra; }
104              
105             ###########################################################
106             # Private method for the extracting a list of attributes #
107             # and saving them in the private list; the arguments are: #
108             # $pdir --> (list ref) identifies a subdirectory #
109             # $regexp --> to match the attribute name against #
110             # $extra --> info for the {extra} field of a Record #
111             ###########################################################
112             sub extract_attributes {
113 5     5 0 8 my ($this, $pdir, $regexp, $extra) = @_; my ($name, $value, %summary)= ();
  5         7  
114 5         13 $this->store_xmp_value($pdir, $name, $value, $extra),
115             $summary{$name} = $value while $this->list_extract
116             (['ATTRIBUTE', $regexp, \$name, '(.*)', \$value]);
117 5         15 return \ %summary; }
118              
119             ###########################################################
120             # This private method parses a generic XML string and #
121             # writes its findings in an array of array references. #
122             # Each sublist in the main list starts with a sublist #
123             # type, which can be OPEN, OPEN_ABBR, OPEN_SPECIAL, #
124             # ATTRIBUTE, COMMENT, CONTENT or CLOSE. The parsing algo- #
125             # rithm is my current understanding of what XML is ..... #
126             # ------------------------------------------------------- #
127             # Spaces before a tag are not meaningful, but they cannot #
128             # be thrown away before textual values. Keeping track of #
129             # this condition is the reason for the $f flag. #
130             ###########################################################
131             sub parse_xml_string {
132 1     1 0 2 my ($this, $string) = @_;
133             # initialisation of this private, intermediate list
134 1 50       4 $this->{private_list} = [] unless exists $this->{private_list};
135             # some variables and their initialisation
136 1         4 my $mkp_tag = qr/[\w:-]+/o; my $spaces; my $f = 0;
  1         2  
  1         2  
137             # how to push a new list of strings onto the private list
138 1     29   6 my $lpush = sub { push @{$this->{private_list}}, [@_] };
  29         19  
  29         113  
139             # how to extract the attribute list of a tag
140 7     7   9 my $apush = sub { my ($p) = @_; &$lpush('ATTRIBUTE', $1, $3) while $p
  7         453  
141             =~ s/^\s*($mkp_tag)=([\'\"])([^\'\"]*)\2//o;
142 1 50       4 &$lpush('IMPOSSIBLE', $p) if $p; };
  7         12  
143 17         411 PARSE_LOOP:
144             # extract spaces at the beginning (they are important for content!)
145 17   100     55 $$string =~ s/^(\s*)//o; $spaces = $1 || '';
146             # try to speed regular expressions up by lookint at the
147             # first two characters of the current string
148 17 100       297 if (substr($$string, 0, 1) eq '<') {
149 15         15 my $s = substr($$string, 1, 1);
150             # extract a closing markup
151 15 100 66     525 if ($s eq '/' && $$string =~ s/^<\/($mkp_tag)>//o) {
    50 33        
    100 66        
    50          
152 6 50       13 &$lpush('CONTENT', $spaces) if $f; $f=0; &$lpush('CLOSE', $1); }
  6         6  
  6         10  
153             # extract a comment, if present ( )
154             elsif ($s eq '!' && $$string =~ s/^//o) {
155 0         0 &$lpush('COMMENT', $1); $f=0; }
  0         0  
156             # extract header tags ( ) + attributes
157             elsif ($s eq '?' && $$string =~ s/^<\?($mkp_tag) ?([^\?]*?)\?>//o) {
158 3 50       7 &$lpush('OPEN_SPECIAL', $1); &$apush($2) if $2; $f=0; }
  3         9  
  3         5  
159             # extract an opening markup with or without attributes
160             # extract also self-contained tags ( <.... /> ), (not closing)
161             elsif ($$string =~ s/^<($mkp_tag) ?([^\?]*?)(\/?)>//o) {
162 6 50       14 &$lpush($3 ? 'OPEN_ABBR' : 'OPEN', $1); &$apush($2) if $2;
  6 100       16  
163 6 50       10 $3 ? &$lpush ('CLOSE_ABBR') : $f = 1; }
164             # an impossible case
165 0 0       0 else { &$lpush('IMPOSSIBLE', $$string) if $string; $$string = ""; }
  0         0  
166             # extract content (spaces are important ...)
167 2         41 } else { $$string =~ s/^([^<]+)//o; &$lpush('CONTENT', $spaces.$1); $f=0; }
  2         6  
  2         2  
168             # parse the rest of the string
169 17 100       39 $$string ? goto PARSE_LOOP : return;
170             }
171              
172             ###########################################################
173             # Framework for the XMP packet. The packet content is #
174             # sandwiched between a header and a trailer, and may #
175             # contain padding whitespaces at the end. The 'xpacket' #
176             # header has two mandatory attributes, 'begin' and 'id' #
177             # (order is important), separated by exactly one space. #
178             # Attribute values, here and in the following, are enclo- #
179             # sed by single quotes or double quotes. The value of #
180             # 'begin' must be the Unicode "zero-width non-breaking #
181             # space" (U+FEFF); an empty value is also acceptable (for #
182             # backward compatibility), and means UTF-8. The value of #
183             # 'id' is fixed. Other attributes may be ignored. A pad- #
184             # ding of 2KB or 4KB, with a newline every 100 spaces, is #
185             # recommended. The 'end' attribute of the trailer may #
186             # have a value of "r" (read-only) or "w" (modifiable). #
187             # ------------------------------------------------------- #
188             # The structure of the packet content is as follows. #
189             # There is an optional x:xmpmeta (or x:xapmeta for older #
190             # files) element, with a mandatory xmlns:x attribute set #
191             # to "adobe:ns:meta/" and other optional attributes, #
192             # which can be ignored. Inside it (or at top level, if it #
193             # is absent), there is exactly one rdf:RDF element with #
194             # an attribute specifying the xmlns:rdf namespace (other #
195             # namespaces can be listed here as additional attributes).#
196             # Inside the 'rdf:RDF' element then, all XMP properties #
197             # are stored inside one or more rdf:Description element. #
198             # ------------------------------------------------------- #
199             # #
200             # #
201             # #
202             # [rdf:Description]+ #
203             # #
204             # #
205             # ... padding with XML whitespaces ... #
206             # #
207             ###########################################################
208             sub test_xmp_header {
209 1     1 0 3 my ($this) = @_;
210 1         2 my ($rw, $filter, $f1, $f2, $meta, $ns, $URI) = ();
211             # search for
212 1 50       6 $this->list_extract(['OPEN_SPECIAL', 'xpacket'])
213             || $this->die('XMP not starting with "xpacket"');
214 1 50       5 $this->list_extract(['ATTRIBUTE', 'begin', $APP1_XMP_XPACKET_BEGIN])
215             || $this->die('XMP xpacket-begin not zero-width Unicode space');
216 1 50       4 $this->list_extract(['ATTRIBUTE', 'id', $APP1_XMP_XPACKET_ID])
217             || $this->die('XMP xpacket-id not correct');
218             # extract all additional attributes in the opening tag
219 1         6 $this->extract_attributes(['XMP_HEADER'], '(.*)', 'xpacket');
220             # search for at the end
221 1 50       5 $this->list_extract(['ATTRIBUTE', 'end', '(w|r)', \$rw], -1)
222             || $this->die('XMP xpacket end attribute not found');
223 1 50       4 $this->list_extract(['OPEN_SPECIAL', 'xpacket'], -1) # OPEN, not CLOSE ...
224             || $this->die('XMP not ending with "xpacket"');
225 1         5 $this->store_xmp_value(['XMP_HEADER'], 'xpacket-rw', $rw);
226             # extract additional filters (are these undocumented?)
227 1         5 while ($this->list_extract(['OPEN_SPECIAL', '(.*)', \$filter])) {
228 1         5 $this->list_extract(['ATTRIBUTE', '(.*)', \$f1, '(.*)', \$f2]);
229 1         5 $this->store_xmp_value(['XMP_HEADER'], $filter, "$f1=\"$f2\""); }
230             # take care of the xmpmeta/xapmeta tags, if present
231 1 50       5 $this->list_extract(['OPEN', '(x:x[am]pmeta)', \$meta]) || goto NO_XMPMETA;
232 1         4 $this->store_xmp_value(['XMP_HEADER'], 'meta', $meta);
233 1 50       4 $this->list_extract(['CLOSE', $meta], -1)
234             || $this->die('XMP x:x[am]pmeta not closing');
235 1 50       5 $this->list_extract(['ATTRIBUTE', 'xmlns:x', $APP1_XMP_META_NS])
236             || $this->die('XMP x:x[am]pmeta without namespace');
237 1         5 $this->extract_attributes(['XMP_HEADER'], '(.*)', 'meta');
238 1 50       4 NO_XMPMETA:
239             # take care of the outer rdf:RDF and its namespace
240             $this->list_extract(['OPEN', 'rdf:RDF'])
241             || $this->die('Outer rdf:RDF not found');
242 1 50       5 $this->list_extract(['ATTRIBUTE', 'xmlns:rdf', $APP1_XMP_OUTER_RDF_NS])
243             || $this->die('Namespace not correct/found in outer rdf:RDF');
244 1 50       6 $this->list_extract(['CLOSE', 'rdf:RDF'], -1)
245             || $this->die('Outer rdf:RDF not closing');
246             # save additional namespaces if present (undocumented?)
247 1         5 $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)', 'rdf:RDF');
248             # extract all rdf:about and check that they are the same
249             # (sometimes 'rdf:' is missing, how should I treat this case?)
250 2         5 my @abouts = map { $$_[2] } grep { $$_[1] =~ /(rdf:|)about/ }
  4         14  
  14         16  
251 1         3 grep { $$_[0] eq 'ATTRIBUTE' } @{$this->{private_list}};
  1         2  
252 1 50       2 $this->die("Inconsistent rdf:about's") if grep { $_ ne $abouts[0]} @abouts;
  2         5  
253 1         4 $this->store_xmp_value(['XMP_HEADER'], 'rdf:about', $abouts[0]);
254             }
255              
256             ###########################################################
257             # Description elements: rdf:Description elements and XMP #
258             # schemas are usually in one-to-one correspondence. Each #
259             # element has two mandatory attributes, 'rdf:about' and #
260             # 'xmlns:NAME'. 'rdf:about' is usually empty (however, it #
261             # can contain an application specific URI), and its value #
262             # *must* be shared among all rdf:Description elements. #
263             # 'xmlns:NAME' specifies the local namespace prefix (NAME #
264             # stands for the actual prefix). Additional namespaces #
265             # can be specified via 'xmlns' attributes. #
266             # ------------------------------------------------------- #
267             # [rdf:Description] :=
268             # xmlns:NAME='text' ..ns..> #
269             # [property(NAME)]+ #
270             # #
271             # ------------------------------------------------------- #
272             # There exists also an abbreviated form where properties #
273             # are listed as attributes of the rdf:Description tag (in #
274             # this case there is no closing rdf:Description> tag, and #
275             # the opening tags ends with the '/' character). #
276             # ------------------------------------------------------- #
277             # [rdf:Description] :=
278             # xmlns:NAME='text' [inlineP(NAME)]+/> #
279             # [inlineP(NAME)] := "NAME:name='value'" #
280             ###########################################################
281             sub parse_rdf_description {
282 2     2 0 3 my ($this) = @_; my ($type, $ns) = ();
  2         3  
283             # extract description opening ($type is OPEN or OPEN_ABBR)
284 2 50       6 $this->list_extract(['(OPEN.*)', \$type, 'rdf:Description']) ||
285             $this->die('first-level rdf:Description opening tag not found');
286             # mandatory rdf:about attribute (its value is already checked)
287 2 50       7 $this->list_extract(['ATTRIBUTE', '(rdf:|)about', '.*'])
288             || $this->die('rdf:about failure (missing or inconsistent)');
289             # mandatory main namespace in xmlns:abbreviation
290 2 50       8 $this->list_equal(['ATTRIBUTE', 'xmlns:.*', '.*'])
291             || $this->die('rdf:Description namespace not found');
292             # extract all additional namespaces (and find the secondary one)
293             # the exact meaning of this operation is to be clarified (TODO)
294 2         5 my $nss = $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)');
295 2 50 33     7 do { $ns = $_ if $$nss{$_}!~ /\#$/ && ! defined $ns } for keys %$nss;
  2         16  
296             # if $type is OPEN_ABBR, all simple properties are attributes
297 2 50       5 $this->extract_attributes(['PROPERTIES'], '(.*)', 'abbr'), return
298             if $type eq 'OPEN_ABBR';
299             # some rdf:Description's are there only as placeholders (only empty
300             # content) --> do not try to extract properties in this case. In
301             # the general case, parse all properties in this rdf:Description
302 2 50       5 unless ($this->list_extract(['CONTENT', '\s*'])) {
303 2         7 $this->parse_rdf_property($ns, ['PROPERTIES'])
304             while ! $this->list_equal(['CLOSE', 'rdf:Description']); }
305             # parse the close tag of rdf:Description
306 2 50       7 $this->list_extract(['CLOSE', 'rdf:Description'])
307             || $this->die('first-level rdf:Description closing tag not found');
308 2         8 1 }
309              
310             ###########################################################
311             # This private method is a dispatcher for the abstract #
312             # concept of XMP property. Actual properties are either #
313             # simple or structured or they are array properties. #
314             # ------------------------------------------------------- #
315             # [property(NAME)] := [simpleP(NAME)] #
316             # or [structuredP(NAME)] #
317             # or [arrayP(NAME)] #
318             ###########################################################
319             sub parse_rdf_property {
320 2     2 0 4 my ($this, $ns, $pdir) = @_;
321 2 0 33     6 $this->parse_comment ($ns, $pdir) ||
      33        
      33        
322             $this->parse_rdf_simple_property($ns, $pdir) ||
323             $this->parse_rdf_struct_property($ns, $pdir) ||
324             $this->parse_rdf_array_property ($ns, $pdir) ||
325             $this->die('parse_rdf_property: unhandled case');
326 2         8 1 }
327              
328             ###########################################################
329             # Comments: this is undocumented in the XMP manual by #
330             # Adobe, but there is evidence that some properties may #
331             # be replaced by a comment, usually carrying its name. #
332             # ------------------------------------------------------- #
333             # [comment] := #
334             ###########################################################
335             sub parse_comment {
336 2     2 0 2 my ($this, $ns, $pdir) = @_; my $comment = '';
  2         2  
337 2 50       5 return 0 unless $this->list_extract(['COMMENT', '(.*)', \$comment]);
338 0         0 $this->store_xmp_value($pdir, "$ns:COMMENT", $comment);
339 0         0 1 }
340              
341             ###########################################################
342             # Simple properties: a simple property is usually just #
343             # some literal value between opening and closing tags #
344             # carrying the property name; it can have qualifiers #
345             # (attributes). Just to make things easier, it seems that #
346             # there is the (undocumented) possibility of replacing #
347             # the property value (text) with a sequence of general #
348             # properties (i.e., a clone of a structured property ...) #
349             # ------------------------------------------------------- #
350             # [simpleP(NAME)] := text
351             # or [property(name)]+
352             # [qualifier] := "name:pnam='text'" #
353             ###########################################################
354             sub parse_rdf_simple_property {
355 2     2 0 3 my ($this, $ns, $pdir) = @_; my ($name, $n, $content, $v) = ();
  2         3  
356             # try to match structure and return on failure; indeed, it
357             # is difficult to "match" a simple property, so, we try to
358             # exclude all other cases here ...
359 2 50       6 return 0 if $this->list_equal([['OPEN', '.*'], ['OPEN', 'rdf:.*']]);
360             # extract the opening tag with the property name
361 2 50       9 $this->list_extract(['OPEN', "($ns:.*)", \$name])
362             || $this->die('simple property: error at opening tag');
363             # property qualifiers not yet supported yet!! (TODO)
364             # case I: the value is simply text
365 2 50       8 if ($this->list_extract(['CONTENT', '(.*)', \$content])) {
366 2         16 $this->store_xmp_value($pdir, $name, $content); }
367             # case II: the "value" is a sequence of properties
368             # this is to be clarified .... (TODO)
369 0         0 else { push @$pdir, $name;
370 0         0 $this->extract_attributes($pdir, '(.*)', 'ATTRIBUTE');
371 0         0 $this->store_xmp_value($pdir, 'CONTENT', $v)
372             while $this->list_extract(['CONTENT', '(.*)', \$v]);
373 0         0 $this->parse_rdf_simple_property($ns, $pdir)
374             while ! $this->list_equal(['CLOSE', "$name"]);
375 0         0 pop @$pdir; }
376             # closing tag
377 2 50       9 $this->list_extract(['CLOSE', "$name"])
378             || $this->die('simple property: error at closing tag');
379 2         10 1 }
380              
381             ###########################################################
382             # Structured properties: agglomerates of properties of #
383             # different type. The inner properties are stored inside #
384             # a secondary rdf:Description tag, which also contains a #
385             # secondary namespace definition, to be used by inner #
386             # properties. I hope this is all. #
387             # ------------------------------------------------------- #
388             # [structuredP(NAME)] := #
389             #
390             # [property(N2)]+ #
391             # #
392             # #
393             ###########################################################
394             sub parse_rdf_struct_property {
395 0     0 0   my ($this, $ns, $pdir) = @_; my ($name, $ns_2, $ns_2_v) = ();
  0            
396             # try to match structure and return on failure
397 0 0         return 0 unless $this->list_extract
398             (['OPEN', "$ns:(.*)", \$name], ['OPEN', 'rdf:Description'],
399             ['ATTRIBUTE', 'xmlns:(.*)', \$ns_2, '(.*)', \$ns_2_v]);
400             # store the property content
401 0           $this->store_xmp_value(['SCHEMAS'], $ns_2, $ns_2_v);
402             # get all embedded properties
403 0           $this->parse_rdf_property($ns_2, [@$pdir, $name])
404             while ! $this->list_equal(['CLOSE', $name]);
405             # find where tags are closing
406 0 0         $this->list_extract(['CLOSE', $name])
407             || $this->die('structured property: error at closing tag');
408 0           1 }
409              
410             ###########################################################
411             # Array properties: rdf:Seq is for an ordered list of #
412             # properties, rdf:Bag for an unordered set of properties #
413             # and rdf:Alt for a list of alternatives. Items are most #
414             # often homogeneous, but this is not a rule. There is a #
415             # namespace problem for qualified items (TODO) #
416             # ------------------------------------------------------- #
417             # [arrayP(NAME)] := #
418             # #
419             # [item]+ #
420             # #
421             # #
422             # [item] := [simple_item] or [prop_item] or #
423             # [qualif_item(N2)] or [lang_item] #
424             # ------------------------------------------------------- #
425             # Note: a [lang_item] can be found only in an rdf:Alt, #
426             # and this rdf:Alt must in turn contain only [lang_item] #
427             # items, but this check is not yet implemented (TODO). #
428             ###########################################################
429             sub parse_rdf_array_property {
430 0     0 0   my ($this, $ns, $pdir) = @_; my ($name, $type) = ();
  0            
431             # try to match structure and return on failure
432 0 0         return 0 unless $this->list_extract
433             ([['OPEN',"($ns:.*)",\$name], ['OPEN','(rdf:(Bag|Seq|Alt))',\$type]]);
434             # get all items in this array property
435 0           while (! $this->list_equal(['CLOSE', $type])) {
436 0 0         $this->parse_rdf_item ([@$pdir, $name]) && next;
437 0 0         $this->parse_rdf_item_lang ([@$pdir, $name]) && next;
438 0 0         $this->parse_rdf_item_property ([@$pdir, $name]) && next;
439 0 0         $this->parse_rdf_item_qualified([@$pdir, $name]) && next;
440 0           $this->die('parse_rdf_array_property: unhandled case'); }
441             # store the property type in the subdirectory
442 0           $this->search_record(@$pdir, $name)->{extra} = $type;
443             # find where tags are closing
444 0 0         $this->list_extract([['CLOSE', $type], ['CLOSE', "$name"]])
445             || $this->die('array property: error at closing tag');
446 0           1 }
447              
448             ###########################################################
449             # Simple items: just text strings inside rdf:li tags. It #
450             # is the simplest case for rdf:Bag, rdf:Set and rdf:Alt #
451             # array properties. It does not need a subdirectory. #
452             # ------------------------------------------------------- #
453             # [simple_item] := text #
454             ###########################################################
455             sub parse_rdf_item {
456 0     0 0   my ($this, $pdir) = @_; my ($content) = ();
  0            
457             # try to match structure and return on failure
458 0 0         return 0 unless $this->list_extract
459             ([['OPEN','rdf:li'],['CONTENT','(.*)',\$content],['CLOSE','rdf:li']]);
460             # store the property content
461 0           $this->store_xmp_value($pdir, 'ITEM', $content);
462 0           1 }
463              
464             ###########################################################
465             # Property items: these items contain another property #
466             # which is not simple text, e.g., a structured property #
467             # or an array property. Additional qualifiers can be spe- #
468             # cified as attributes of the rdf:li tag. Such properties #
469             # in general require their own subdirectories. #
470             # ------------------------------------------------------- #
471             # [prop_item] := [simplP(NAME)]
472             ###########################################################
473             sub parse_rdf_item_property {
474 0     0 0   my ($this, $pdir) = @_; my ($name, $value) = ();
  0            
475             # try to match structure and return on failure
476 0 0         return 0 unless $this->list_equal
477             ([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'rdf:.*', '.*'], ['OPEN', '.*']]);
478 0           $this->list_extract([['OPEN', 'rdf:li'],
479             ['ATTRIBUTE', '(rdf:.*)', \$name, '(.*)', \$value]]);
480             # store the property content
481 0           $this->store_xmp_value([@$pdir, 'ITEM'], $name, $value, 'QUALIFIER');
482             # this is plainly wrong: how to extract the correct namespace? TODO
483 0           $this->parse_rdf_property('stJob', [@$pdir, 'ITEM']);
484 0 0         $this->list_extract(['CLOSE', 'rdf:li'])
485             || $this->die('item_property: error at closing tag');
486 0           1 }
487              
488             ###########################################################
489             # Qualified items: these items can be found inside an #
490             # array property ('Bag', 'Seq' or 'Alt') and differ from #
491             # standard items because they do not only have a value, #
492             # but also one or more "qualifiers"; they remain unnamed, #
493             # however. The namespace of the qualifiers can be diffe- #
494             # rent from the main namespace, but this is not yet taken #
495             # into account (TODO). #
496             # ------------------------------------------------------- #
497             # [qualif_item(N2)] := #
498             # #
499             # text #
500             # [qualifier(N2)]* #
501             # #
502             # #
503             # [qualifier(N2)] := text #
504             ###########################################################
505             sub parse_rdf_item_qualified {
506 0     0 0   my ($this, $pdir) = @_; my ($name, $value) = ('qualified-ITEM');
  0            
507             # try to match structure and return on failure
508 0 0         return 0 unless $this->list_extract
509             ([['OPEN','rdf:li'], ['OPEN','rdf:Description'], ['OPEN','rdf:value'],
510             ['CONTENT', '(.*)', \$value], ['CLOSE', 'rdf:value']]);
511             # store the qualified property value, then all qualifiers;
512             # we need a new subdirectory to store all this stuff
513 0           $this->store_xmp_value([@$pdir, $name], 'ITEM', $value);
514 0           1 while $this->parse_rdf_simple_property('.*', [@$pdir, $name]);
515             # find where tags are closing
516 0 0         $this->list_extract([['CLOSE', 'rdf:Description'], ['CLOSE', 'rdf:li']])
517             || $this->die('item_qualified: error at closing tag');
518 0           1 }
519              
520             ###########################################################
521             # Language alternatives: these are items inside an 'Alt' #
522             # array properties. It should not be possible to mix #
523             # language alternatives and normal items, but this is not #
524             # currently checked (TODO ?) #
525             # ------------------------------------------------------- #
526             # [lang_item] := text #
527             ###########################################################
528             sub parse_rdf_item_lang {
529 0     0 0   my ($this, $pdir) = @_; my ($language, $content) = ();
  0            
530             # try to match structure and return on failure
531 0 0         return 0 unless $this->list_extract
532             ([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'xml:lang', '(.*)', \$language],
533             ['CONTENT', '(.*)', \$content], ['CLOSE', 'rdf:li']]);
534             # store the property content
535 0           $this->store_xmp_value($pdir, $language, $content, 'lang-alt');
536 0           1 }
537              
538             # successful load
539             1;