File Coverage

blib/lib/Image/MetaData/JPEG/parsers/app1_xmp.pl

Criterion	Covered	Total	%
statement	140	195	71.7
branch	52	118	44.0
condition	13	25	52.0
subroutine	18	24	75.0
pod	0	17	0.0
total	223	379	58.8

line	stmt	bran	cond	sub	pod	time	code
1							###########################################################
2							# A Perl package for showing/modifying JPEG (meta)data. #
3							# Copyright (C) 2004,2005,2006 Stefano Bettelli #
4							# See the COPYING and LICENSE files for license terms. #
5							###########################################################
6	15			15		67	use Image::MetaData::JPEG::data::Tables qw(:TagsAPP1_XMP);
	15					21
	15					1736
7	15			15		72	no integer;
	15					19
	15					62
8	15			15		261	use strict;
	15					22
	15					376
9	15			15		54	use warnings;
	15					18
	15					1685
10
11							###########################################################
12							# This method is the entry point for APP1 XMP segments. #
13							# Such APP1 segments are used by Adobe for recording an #
14							# XMP packet in JPEG files (this is a special XML block #
15							# storing metadata information, similarly to Exif APP1 or #
16							# IPTC APP13). The advantage of XMP is that it is exten- #
17							# sible and that it can be embedded in many file types, #
18							# like JPEG, PNG, GIF, HTML, PDF, PostScript, ecc... #
19							# Only the envelope changes. The format is the following: #
20							#---------------------------------------------------------#
21							# 29 bytes namespace = http://ns.adobe.com/xap/1.0/\000 #
22							# .... XMP packet (in some Unicode encoding) #
23							#=========================================================#
24							# First, check that the mandatory Adobe namespace string #
25							# is there. Then, parse the XML and save the intermediate #
26							# results. Last, Check that the XML block conforms to the #
27							# RDF and XMP specifications (issue an error otherwise). #
28							###########################################################
29							# Ref: "XMP Specification", version 3.2, June 2005, Adobe #
30							# Systems Inc., San Jose, CA, http://www.adobe.com #
31							###########################################################
32							sub parse_app1_xmp {
33	1			1	0	1	my ($this) = @_;
34							# slurp the segment as a single string
35	1					4	my $packet = $this->read_record($ASCII, 0, $this->size());
36							# get rid of newline chars
37	1					15	$packet =~ y/\n\r//d;
38							# the ID must be Adobe's namespace; die if it is not correct
39	1					64	$packet =~ s/^($APP1_XMP_TAG\|.{0,15})(.*)$/$2/;
40	1	50				6	$this->die("Incorrect XMP namespace ($1)") unless $1 eq $APP1_XMP_TAG;
41	1					5	$this->store_record('NAMESPACE', $ASCII, \ "$1");
42							# (TODO): find the used Unicode encoding and deal with it
43	15			15		8325	use Encode; Encode::_utf8_on($packet);
	15					120304
	15					31428
	1					13
44							# analyse the XML packet (this cannot fail)
45	1					6	$this->parse_xml_string(\ $packet); # writes into $this->{private_list}
46							#print join '::', @$_, "\n" for @{$this->{private_list}};
47							# check header (xpacket, x:x[am]pmeta and the outer rdf:RDF)
48	1					5	$this->test_xmp_header();
49							# test that XMP syntax is correct; [Dlist(ABOUT)] := [Desc(ABOUT)]+
50	1					5	$this->parse_rdf_description()
51							while $this->list_equal(['OPEN', 'rdf:Description']);
52							# cleanup
53	1					9	delete $this->{private_list};
54							}
55
56							###########################################################
57							# This private method runs a series of regular expression #
58							# match tests against the private list (starting at posi- #
59							# tion $offset). $regexps_array is either a reference to #
60							# a list of references to regexp rules, or a reference to #
61							# a single such list. A regexp rule consists of a list of #
62							# regular express.s and variables to assign submatches to.#
63							###########################################################
64							sub list_equal {
65	50			50	0	46	my ($this, $regexps_array, $offset) = (@_, 0);
66							# convert a single rule into a list of rules
67	50	100				133	$regexps_array = [$regexps_array] unless ref $$regexps_array[0] eq 'ARRAY';
68							# check each rule separately, return as soon as possible
69	50					80	for my $pos ($offset..$offset + $#$regexps_array) {
70	52	100				91	return 0 unless exists $this->{private_list}->[$pos];
71							# do not modify the private list for the time being
72	51					39	my $elements = [ @{$this->{private_list}->[$pos]} ];
	51					87
73	51					44	my $regexps = $regexps_array->[$pos];
74	51					36	while (@{$regexps}) {
	139					250
75	102	50				139	return 0 unless @$elements;
76	102					106	my ($e, $r) = (shift(@$elements), shift(@$regexps));
77	102	100				964	my @matches = $e =~ /^$r$/; return 0 unless @matches;
	102					227
78	88					177	${shift @$regexps} = shift @matches while ref $$regexps[0]; } }
	19					43
79	35					82	return 1 + $#$regexps_array; }
80
81							###########################################################
82							# This private method is almost the same as list_equal, #
83							# but, if the match is positive, it also removes matching #
84							# lines from the private list. #
85							###########################################################
86							sub list_extract {
87	39			39	0	45	my ($this, $regexps_array, $offset, $number) = (@_, 0);
88	39		100			75	my $lines = $this->list_equal($regexps_array, $offset) \|\| return 0;
89	29					24	splice @{$this->{private_list}}, $offset, $lines; return 1; }
	29					44
	29					76
90
91							###########################################################
92							# Private method for saving a piece of information into #
93							# the private list (always undefined type). Arguments are:#
94							# $pdir --> (list ref) identifies a subdirectory #
95							# $name --> of the Record to be saved #
96							# $value --> content to be saved in the Record #
97							# $extra --> optonal info for {extra} field of a Record #
98							###########################################################
99							sub store_xmp_value {
100	10			10	0	15	my ($this, $pdir, $name, $value, $extra) = @_;
101	10					29	my $rec = $this->store_record
102							($this->provide_subdirectory(@$pdir), $name, $UNDEF, \$value);
103	10	100				42	$rec->{extra} = $extra if $extra; }
104
105							###########################################################
106							# Private method for the extracting a list of attributes #
107							# and saving them in the private list; the arguments are: #
108							# $pdir --> (list ref) identifies a subdirectory #
109							# $regexp --> to match the attribute name against #
110							# $extra --> info for the {extra} field of a Record #
111							###########################################################
112							sub extract_attributes {
113	5			5	0	8	my ($this, $pdir, $regexp, $extra) = @_; my ($name, $value, %summary)= ();
	5					7
114	5					13	$this->store_xmp_value($pdir, $name, $value, $extra),
115							$summary{$name} = $value while $this->list_extract
116							(['ATTRIBUTE', $regexp, \$name, '(.*)', \$value]);
117	5					15	return \ %summary; }
118
119							###########################################################
120							# This private method parses a generic XML string and #
121							# writes its findings in an array of array references. #
122							# Each sublist in the main list starts with a sublist #
123							# type, which can be OPEN, OPEN_ABBR, OPEN_SPECIAL, #
124							# ATTRIBUTE, COMMENT, CONTENT or CLOSE. The parsing algo- #
125							# rithm is my current understanding of what XML is ..... #
126							# ------------------------------------------------------- #
127							# Spaces before a tag are not meaningful, but they cannot #
128							# be thrown away before textual values. Keeping track of #
129							# this condition is the reason for the $f flag. #
130							###########################################################
131							sub parse_xml_string {
132	1			1	0	2	my ($this, $string) = @_;
133							# initialisation of this private, intermediate list
134	1	50				4	$this->{private_list} = [] unless exists $this->{private_list};
135							# some variables and their initialisation
136	1					4	my $mkp_tag = qr/[\w:-]+/o; my $spaces; my $f = 0;
	1					2
	1					2
137							# how to push a new list of strings onto the private list
138	1			29		6	my $lpush = sub { push @{$this->{private_list}}, [@_] };
	29					19
	29					113
139							# how to extract the attribute list of a tag
140	7			7		9	my $apush = sub { my ($p) = @_; &$lpush('ATTRIBUTE', $1, $3) while $p
	7					453
141							=~ s/^\s($mkp_tag)=([\'\"])([^\'\"])\2//o;
142	1	50				4	&$lpush('IMPOSSIBLE', $p) if $p; };
	7					12
143	17					411	PARSE_LOOP:
144							# extract spaces at the beginning (they are important for content!)
145	17		100			55	$$string =~ s/^(\s*)//o; $spaces = $1 \|\| '';
146							# try to speed regular expressions up by lookint at the
147							# first two characters of the current string
148	17	100				297	if (substr($$string, 0, 1) eq '<') {
149	15					15	my $s = substr($$string, 1, 1);
150							# extract a closing markup
151	15	100	66			525	if ($s eq '/' && $$string =~ s/^<\/($mkp_tag)>//o) {
		50	33
		100	66
		50
152	6	50				13	&$lpush('CONTENT', $spaces) if $f; $f=0; &$lpush('CLOSE', $1); }
	6					6
	6					10
153							# extract a comment, if present ( )
154							elsif ($s eq '!' && $$string =~ s/^//o) {
155	0					0	&$lpush('COMMENT', $1); $f=0; }
	0					0
156							# extract header tags ( ) + attributes
157							elsif ($s eq '?' && $$string =~ s/^<\?($mkp_tag) ?([^\?]*?)\?>//o) {
158	3	50				7	&$lpush('OPEN_SPECIAL', $1); &$apush($2) if $2; $f=0; }
	3					9
	3					5
159							# extract an opening markup with or without attributes
160							# extract also self-contained tags ( <.... /> ), (not closing)
161							elsif ($$string =~ s/^<($mkp_tag) ?([^\?]*?)(\/?)>//o) {
162	6	50				14	&$lpush($3 ? 'OPEN_ABBR' : 'OPEN', $1); &$apush($2) if $2;
	6	100				16
163	6	50				10	$3 ? &$lpush ('CLOSE_ABBR') : $f = 1; }
164							# an impossible case
165	0	0				0	else { &$lpush('IMPOSSIBLE', $$string) if $string; $$string = ""; }
	0					0
166							# extract content (spaces are important ...)
167	2					41	} else { $$string =~ s/^([^<]+)//o; &$lpush('CONTENT', $spaces.$1); $f=0; }
	2					6
	2					2
168							# parse the rest of the string
169	17	100				39	$$string ? goto PARSE_LOOP : return;
170							}
171
172							###########################################################
173							# Framework for the XMP packet. The packet content is #
174							# sandwiched between a header and a trailer, and may #
175							# contain padding whitespaces at the end. The 'xpacket' #
176							# header has two mandatory attributes, 'begin' and 'id' #
177							# (order is important), separated by exactly one space. #
178							# Attribute values, here and in the following, are enclo- #
179							# sed by single quotes or double quotes. The value of #
180							# 'begin' must be the Unicode "zero-width non-breaking #
181							# space" (U+FEFF); an empty value is also acceptable (for #
182							# backward compatibility), and means UTF-8. The value of #
183							# 'id' is fixed. Other attributes may be ignored. A pad- #
184							# ding of 2KB or 4KB, with a newline every 100 spaces, is #
185							# recommended. The 'end' attribute of the trailer may #
186							# have a value of "r" (read-only) or "w" (modifiable). #
187							# ------------------------------------------------------- #
188							# The structure of the packet content is as follows. #
189							# There is an optional x:xmpmeta (or x:xapmeta for older #
190							# files) element, with a mandatory xmlns:x attribute set #
191							# to "adobe:ns:meta/" and other optional attributes, #
192							# which can be ignored. Inside it (or at top level, if it #
193							# is absent), there is exactly one rdf:RDF element with #
194							# an attribute specifying the xmlns:rdf namespace (other #
195							# namespaces can be listed here as additional attributes).#
196							# Inside the 'rdf:RDF' element then, all XMP properties #
197							# are stored inside one or more rdf:Description element. #
198							# ------------------------------------------------------- #
199							# #
200							# #
201							# #
202							# [rdf:Description]+ #
203							# #
204							# #
205							# ... padding with XML whitespaces ... #
206							# #
207							###########################################################
208							sub test_xmp_header {
209	1			1	0	3	my ($this) = @_;
210	1					2	my ($rw, $filter, $f1, $f2, $meta, $ns, $URI) = ();
211							# search for
212	1	50				6	$this->list_extract(['OPEN_SPECIAL', 'xpacket'])
213							\|\| $this->die('XMP not starting with "xpacket"');
214	1	50				5	$this->list_extract(['ATTRIBUTE', 'begin', $APP1_XMP_XPACKET_BEGIN])
215							\|\| $this->die('XMP xpacket-begin not zero-width Unicode space');
216	1	50				4	$this->list_extract(['ATTRIBUTE', 'id', $APP1_XMP_XPACKET_ID])
217							\|\| $this->die('XMP xpacket-id not correct');
218							# extract all additional attributes in the opening tag
219	1					6	$this->extract_attributes(['XMP_HEADER'], '(.*)', 'xpacket');
220							# search for at the end
221	1	50				5	$this->list_extract(['ATTRIBUTE', 'end', '(w\|r)', \$rw], -1)
222							\|\| $this->die('XMP xpacket end attribute not found');
223	1	50				4	$this->list_extract(['OPEN_SPECIAL', 'xpacket'], -1) # OPEN, not CLOSE ...
224							\|\| $this->die('XMP not ending with "xpacket"');
225	1					5	$this->store_xmp_value(['XMP_HEADER'], 'xpacket-rw', $rw);
226							# extract additional filters (are these undocumented?)
227	1					5	while ($this->list_extract(['OPEN_SPECIAL', '(.*)', \$filter])) {
228	1					5	$this->list_extract(['ATTRIBUTE', '(.)', \$f1, '(.)', \$f2]);
229	1					5	$this->store_xmp_value(['XMP_HEADER'], $filter, "$f1=\"$f2\""); }
230							# take care of the xmpmeta/xapmeta tags, if present
231	1	50				5	$this->list_extract(['OPEN', '(x:x[am]pmeta)', \$meta]) \|\| goto NO_XMPMETA;
232	1					4	$this->store_xmp_value(['XMP_HEADER'], 'meta', $meta);
233	1	50				4	$this->list_extract(['CLOSE', $meta], -1)
234							\|\| $this->die('XMP x:x[am]pmeta not closing');
235	1	50				5	$this->list_extract(['ATTRIBUTE', 'xmlns:x', $APP1_XMP_META_NS])
236							\|\| $this->die('XMP x:x[am]pmeta without namespace');
237	1					5	$this->extract_attributes(['XMP_HEADER'], '(.*)', 'meta');
238	1	50				4	NO_XMPMETA:
239							# take care of the outer rdf:RDF and its namespace
240							$this->list_extract(['OPEN', 'rdf:RDF'])
241							\|\| $this->die('Outer rdf:RDF not found');
242	1	50				5	$this->list_extract(['ATTRIBUTE', 'xmlns:rdf', $APP1_XMP_OUTER_RDF_NS])
243							\|\| $this->die('Namespace not correct/found in outer rdf:RDF');
244	1	50				6	$this->list_extract(['CLOSE', 'rdf:RDF'], -1)
245							\|\| $this->die('Outer rdf:RDF not closing');
246							# save additional namespaces if present (undocumented?)
247	1					5	$this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)', 'rdf:RDF');
248							# extract all rdf:about and check that they are the same
249							# (sometimes 'rdf:' is missing, how should I treat this case?)
250	2					5	my @abouts = map { $$_[2] } grep { $$_[1] =~ /(rdf:\|)about/ }
	4					14
	14					16
251	1					3	grep { $$_[0] eq 'ATTRIBUTE' } @{$this->{private_list}};
	1					2
252	1	50				2	$this->die("Inconsistent rdf:about's") if grep { $_ ne $abouts[0]} @abouts;
	2					5
253	1					4	$this->store_xmp_value(['XMP_HEADER'], 'rdf:about', $abouts[0]);
254							}
255
256							###########################################################
257							# Description elements: rdf:Description elements and XMP #
258							# schemas are usually in one-to-one correspondence. Each #
259							# element has two mandatory attributes, 'rdf:about' and #
260							# 'xmlns:NAME'. 'rdf:about' is usually empty (however, it #
261							# can contain an application specific URI), and its value #
262							# must be shared among all rdf:Description elements. #
263							# 'xmlns:NAME' specifies the local namespace prefix (NAME #
264							# stands for the actual prefix). Additional namespaces #
265							# can be specified via 'xmlns' attributes. #
266							# ------------------------------------------------------- #
267							# [rdf:Description] :=
268							# xmlns:NAME='text' ..ns..> #
269							# [property(NAME)]+ #
270							# #
271							# ------------------------------------------------------- #
272							# There exists also an abbreviated form where properties #
273							# are listed as attributes of the rdf:Description tag (in #
274							# this case there is no closing rdf:Description> tag, and #
275							# the opening tags ends with the '/' character). #
276							# ------------------------------------------------------- #
277							# [rdf:Description] :=
278							# xmlns:NAME='text' [inlineP(NAME)]+/> #
279							# [inlineP(NAME)] := "NAME:name='value'" #
280							###########################################################
281							sub parse_rdf_description {
282	2			2	0	3	my ($this) = @_; my ($type, $ns) = ();
	2					3
283							# extract description opening ($type is OPEN or OPEN_ABBR)
284	2	50				6	$this->list_extract(['(OPEN.*)', \$type, 'rdf:Description']) \|\|
285							$this->die('first-level rdf:Description opening tag not found');
286							# mandatory rdf:about attribute (its value is already checked)
287	2	50				7	$this->list_extract(['ATTRIBUTE', '(rdf:\|)about', '.*'])
288							\|\| $this->die('rdf:about failure (missing or inconsistent)');
289							# mandatory main namespace in xmlns:abbreviation
290	2	50				8	$this->list_equal(['ATTRIBUTE', 'xmlns:.', '.'])
291							\|\| $this->die('rdf:Description namespace not found');
292							# extract all additional namespaces (and find the secondary one)
293							# the exact meaning of this operation is to be clarified (TODO)
294	2					5	my $nss = $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)');
295	2	50	33			7	do { $ns = $_ if $$nss{$_}!~ /\#$/ && ! defined $ns } for keys %$nss;
	2					16
296							# if $type is OPEN_ABBR, all simple properties are attributes
297	2	50				5	$this->extract_attributes(['PROPERTIES'], '(.*)', 'abbr'), return
298							if $type eq 'OPEN_ABBR';
299							# some rdf:Description's are there only as placeholders (only empty
300							# content) --> do not try to extract properties in this case. In
301							# the general case, parse all properties in this rdf:Description
302	2	50				5	unless ($this->list_extract(['CONTENT', '\s*'])) {
303	2					7	$this->parse_rdf_property($ns, ['PROPERTIES'])
304							while ! $this->list_equal(['CLOSE', 'rdf:Description']); }
305							# parse the close tag of rdf:Description
306	2	50				7	$this->list_extract(['CLOSE', 'rdf:Description'])
307							\|\| $this->die('first-level rdf:Description closing tag not found');
308	2					8	1 }
309
310							###########################################################
311							# This private method is a dispatcher for the abstract #
312							# concept of XMP property. Actual properties are either #
313							# simple or structured or they are array properties. #
314							# ------------------------------------------------------- #
315							# [property(NAME)] := [simpleP(NAME)] #
316							# or [structuredP(NAME)] #
317							# or [arrayP(NAME)] #
318							###########################################################
319							sub parse_rdf_property {
320	2			2	0	4	my ($this, $ns, $pdir) = @_;
321	2	0	33			6	$this->parse_comment ($ns, $pdir) \|\|
			33
			33
322							$this->parse_rdf_simple_property($ns, $pdir) \|\|
323							$this->parse_rdf_struct_property($ns, $pdir) \|\|
324							$this->parse_rdf_array_property ($ns, $pdir) \|\|
325							$this->die('parse_rdf_property: unhandled case');
326	2					8	1 }
327
328							###########################################################
329							# Comments: this is undocumented in the XMP manual by #
330							# Adobe, but there is evidence that some properties may #
331							# be replaced by a comment, usually carrying its name. #
332							# ------------------------------------------------------- #
333							# [comment] := #
334							###########################################################
335							sub parse_comment {
336	2			2	0	2	my ($this, $ns, $pdir) = @_; my $comment = '';
	2					2
337	2	50				5	return 0 unless $this->list_extract(['COMMENT', '(.*)', \$comment]);
338	0					0	$this->store_xmp_value($pdir, "$ns:COMMENT", $comment);
339	0					0	1 }
340
341							###########################################################
342							# Simple properties: a simple property is usually just #
343							# some literal value between opening and closing tags #
344							# carrying the property name; it can have qualifiers #
345							# (attributes). Just to make things easier, it seems that #
346							# there is the (undocumented) possibility of replacing #
347							# the property value (text) with a sequence of general #
348							# properties (i.e., a clone of a structured property ...) #
349							# ------------------------------------------------------- #
350							# [simpleP(NAME)] := text
351							# or [property(name)]+
352							# [qualifier] := "name:pnam='text'" #
353							###########################################################
354							sub parse_rdf_simple_property {
355	2			2	0	3	my ($this, $ns, $pdir) = @_; my ($name, $n, $content, $v) = ();
	2					3
356							# try to match structure and return on failure; indeed, it
357							# is difficult to "match" a simple property, so, we try to
358							# exclude all other cases here ...
359	2	50				6	return 0 if $this->list_equal([['OPEN', '.'], ['OPEN', 'rdf:.']]);
360							# extract the opening tag with the property name
361	2	50				9	$this->list_extract(['OPEN', "($ns:.*)", \$name])
362							\|\| $this->die('simple property: error at opening tag');
363							# property qualifiers not yet supported yet!! (TODO)
364							# case I: the value is simply text
365	2	50				8	if ($this->list_extract(['CONTENT', '(.*)', \$content])) {
366	2					16	$this->store_xmp_value($pdir, $name, $content); }
367							# case II: the "value" is a sequence of properties
368							# this is to be clarified .... (TODO)
369	0					0	else { push @$pdir, $name;
370	0					0	$this->extract_attributes($pdir, '(.*)', 'ATTRIBUTE');
371	0					0	$this->store_xmp_value($pdir, 'CONTENT', $v)
372							while $this->list_extract(['CONTENT', '(.*)', \$v]);
373	0					0	$this->parse_rdf_simple_property($ns, $pdir)
374							while ! $this->list_equal(['CLOSE', "$name"]);
375	0					0	pop @$pdir; }
376							# closing tag
377	2	50				9	$this->list_extract(['CLOSE', "$name"])
378							\|\| $this->die('simple property: error at closing tag');
379	2					10	1 }
380
381							###########################################################
382							# Structured properties: agglomerates of properties of #
383							# different type. The inner properties are stored inside #
384							# a secondary rdf:Description tag, which also contains a #
385							# secondary namespace definition, to be used by inner #
386							# properties. I hope this is all. #
387							# ------------------------------------------------------- #
388							# [structuredP(NAME)] := #
389							#
390							# [property(N2)]+ #
391							# #
392							# #
393							###########################################################
394							sub parse_rdf_struct_property {
395	0			0	0		my ($this, $ns, $pdir) = @_; my ($name, $ns_2, $ns_2_v) = ();
	0
396							# try to match structure and return on failure
397	0	0					return 0 unless $this->list_extract
398							(['OPEN', "$ns:(.*)", \$name], ['OPEN', 'rdf:Description'],
399							['ATTRIBUTE', 'xmlns:(.)', \$ns_2, '(.)', \$ns_2_v]);
400							# store the property content
401	0						$this->store_xmp_value(['SCHEMAS'], $ns_2, $ns_2_v);
402							# get all embedded properties
403	0						$this->parse_rdf_property($ns_2, [@$pdir, $name])
404							while ! $this->list_equal(['CLOSE', $name]);
405							# find where tags are closing
406	0	0					$this->list_extract(['CLOSE', $name])
407							\|\| $this->die('structured property: error at closing tag');
408	0						1 }
409
410							###########################################################
411							# Array properties: rdf:Seq is for an ordered list of #
412							# properties, rdf:Bag for an unordered set of properties #
413							# and rdf:Alt for a list of alternatives. Items are most #
414							# often homogeneous, but this is not a rule. There is a #
415							# namespace problem for qualified items (TODO) #
416							# ------------------------------------------------------- #
417							# [arrayP(NAME)] := #
418							# #
419							# [item]+ #
420							# #
421							# #
422							# [item] := [simple_item] or [prop_item] or #
423							# [qualif_item(N2)] or [lang_item] #
424							# ------------------------------------------------------- #
425							# Note: a [lang_item] can be found only in an rdf:Alt, #
426							# and this rdf:Alt must in turn contain only [lang_item] #
427							# items, but this check is not yet implemented (TODO). #
428							###########################################################
429							sub parse_rdf_array_property {
430	0			0	0		my ($this, $ns, $pdir) = @_; my ($name, $type) = ();
	0
431							# try to match structure and return on failure
432	0	0					return 0 unless $this->list_extract
433							([['OPEN',"($ns:.*)",\$name], ['OPEN','(rdf:(Bag\|Seq\|Alt))',\$type]]);
434							# get all items in this array property
435	0						while (! $this->list_equal(['CLOSE', $type])) {
436	0	0					$this->parse_rdf_item ([@$pdir, $name]) && next;
437	0	0					$this->parse_rdf_item_lang ([@$pdir, $name]) && next;
438	0	0					$this->parse_rdf_item_property ([@$pdir, $name]) && next;
439	0	0					$this->parse_rdf_item_qualified([@$pdir, $name]) && next;
440	0						$this->die('parse_rdf_array_property: unhandled case'); }
441							# store the property type in the subdirectory
442	0						$this->search_record(@$pdir, $name)->{extra} = $type;
443							# find where tags are closing
444	0	0					$this->list_extract([['CLOSE', $type], ['CLOSE', "$name"]])
445							\|\| $this->die('array property: error at closing tag');
446	0						1 }
447
448							###########################################################
449							# Simple items: just text strings inside rdf:li tags. It #
450							# is the simplest case for rdf:Bag, rdf:Set and rdf:Alt #
451							# array properties. It does not need a subdirectory. #
452							# ------------------------------------------------------- #
453							# [simple_item] := text #
454							###########################################################
455							sub parse_rdf_item {
456	0			0	0		my ($this, $pdir) = @_; my ($content) = ();
	0
457							# try to match structure and return on failure
458	0	0					return 0 unless $this->list_extract
459							([['OPEN','rdf:li'],['CONTENT','(.*)',\$content],['CLOSE','rdf:li']]);
460							# store the property content
461	0						$this->store_xmp_value($pdir, 'ITEM', $content);
462	0						1 }
463
464							###########################################################
465							# Property items: these items contain another property #
466							# which is not simple text, e.g., a structured property #
467							# or an array property. Additional qualifiers can be spe- #
468							# cified as attributes of the rdf:li tag. Such properties #
469							# in general require their own subdirectories. #
470							# ------------------------------------------------------- #
471							# [prop_item] := [simplP(NAME)]
472							###########################################################
473							sub parse_rdf_item_property {
474	0			0	0		my ($this, $pdir) = @_; my ($name, $value) = ();
	0
475							# try to match structure and return on failure
476	0	0					return 0 unless $this->list_equal
477							([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'rdf:.', '.'], ['OPEN', '.*']]);
478	0						$this->list_extract([['OPEN', 'rdf:li'],
479							['ATTRIBUTE', '(rdf:.)', \$name, '(.)', \$value]]);
480							# store the property content
481	0						$this->store_xmp_value([@$pdir, 'ITEM'], $name, $value, 'QUALIFIER');
482							# this is plainly wrong: how to extract the correct namespace? TODO
483	0						$this->parse_rdf_property('stJob', [@$pdir, 'ITEM']);
484	0	0					$this->list_extract(['CLOSE', 'rdf:li'])
485							\|\| $this->die('item_property: error at closing tag');
486	0						1 }
487
488							###########################################################
489							# Qualified items: these items can be found inside an #
490							# array property ('Bag', 'Seq' or 'Alt') and differ from #
491							# standard items because they do not only have a value, #
492							# but also one or more "qualifiers"; they remain unnamed, #
493							# however. The namespace of the qualifiers can be diffe- #
494							# rent from the main namespace, but this is not yet taken #
495							# into account (TODO). #
496							# ------------------------------------------------------- #
497							# [qualif_item(N2)] := #
498							# #
499							# text #
500							# [qualifier(N2)]* #
501							# #
502							# #
503							# [qualifier(N2)] := text #
504							###########################################################
505							sub parse_rdf_item_qualified {
506	0			0	0		my ($this, $pdir) = @_; my ($name, $value) = ('qualified-ITEM');
	0
507							# try to match structure and return on failure
508	0	0					return 0 unless $this->list_extract
509							([['OPEN','rdf:li'], ['OPEN','rdf:Description'], ['OPEN','rdf:value'],
510							['CONTENT', '(.*)', \$value], ['CLOSE', 'rdf:value']]);
511							# store the qualified property value, then all qualifiers;
512							# we need a new subdirectory to store all this stuff
513	0						$this->store_xmp_value([@$pdir, $name], 'ITEM', $value);
514	0						1 while $this->parse_rdf_simple_property('.*', [@$pdir, $name]);
515							# find where tags are closing
516	0	0					$this->list_extract([['CLOSE', 'rdf:Description'], ['CLOSE', 'rdf:li']])
517							\|\| $this->die('item_qualified: error at closing tag');
518	0						1 }
519
520							###########################################################
521							# Language alternatives: these are items inside an 'Alt' #
522							# array properties. It should not be possible to mix #
523							# language alternatives and normal items, but this is not #
524							# currently checked (TODO ?) #
525							# ------------------------------------------------------- #
526							# [lang_item] := text #
527							###########################################################
528							sub parse_rdf_item_lang {
529	0			0	0		my ($this, $pdir) = @_; my ($language, $content) = ();
	0
530							# try to match structure and return on failure
531	0	0					return 0 unless $this->list_extract
532							([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'xml:lang', '(.*)', \$language],
533							['CONTENT', '(.*)', \$content], ['CLOSE', 'rdf:li']]);
534							# store the property content
535	0						$this->store_xmp_value($pdir, $language, $content, 'lang-alt');
536	0						1 }
537
538							# successful load
539							1;