File Coverage

blib/lib/RDF/RDFa/Parser.pm
Criterion Covered Total %
statement 9 11 81.8
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 13 15 86.6


line stmt bran cond sub pod time code
1             package RDF::RDFa::Parser;
2              
3             BEGIN {
4 9     9   306502 $RDF::RDFa::Parser::AUTHORITY = 'cpan:TOBYINK';
5 9         241 $RDF::RDFa::Parser::VERSION = '1.097';
6             }
7              
8 9     9   85 use Carp qw();
  9         18  
  9         137  
9 9     9   15595 use Data::UUID;
  9         11973  
  9         688  
10 9     9   6018 use File::ShareDir qw(dist_file);
  0            
  0            
11             use HTML::HTML5::Parser;
12             use HTML::HTML5::Sanity qw(fix_document);
13             use LWP::UserAgent;
14             use RDF::RDFa::Parser::Config;
15             use RDF::RDFa::Parser::InitialContext;
16             use RDF::RDFa::Parser::OpenDocumentObjectModel;
17             use RDF::Trine 0.130;
18             use Scalar::Util qw(blessed);
19             use Storable qw(dclone);
20             use URI::Escape;
21             use URI;
22             use XML::LibXML qw(:all);
23             use XML::RegExp;
24              
25             use constant {
26             ERR_WARNING => 'w',
27             ERR_ERROR => 'e',
28             };
29             use constant {
30             ERR_CODE_HOST => 'HOST01',
31             ERR_CODE_RDFXML_MUDDLE => 'RDFX01',
32             ERR_CODE_RDFXML_MESS => 'RDFX02',
33             ERR_CODE_PREFIX_BUILTIN => 'PRFX01',
34             ERR_CODE_PREFIX_ILLEGAL => 'PRFX02',
35             ERR_CODE_PREFIX_DISABLED => 'PRFX03',
36             ERR_CODE_INSTANCEOF_USED => 'INST01',
37             ERR_CODE_INSTANCEOF_OVERRULED => 'INST02',
38             ERR_CODE_CURIE_FELLTHROUGH => 'CURI01',
39             ERR_CODE_CURIE_UNDEFINED => 'CURI02',
40             ERR_CODE_BNODE_WRONGPLACE => 'BNOD01',
41             ERR_CODE_VOCAB_DISABLED => 'VOCA01',
42             ERR_CODE_LANG_INVALID => 'LANG01',
43             };
44             use constant {
45             RDF_XMLLIT => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral',
46             RDF_TYPE => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
47             RDF_FIRST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first',
48             RDF_REST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest',
49             RDF_NIL => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil',
50             };
51             use common::sense;
52             use 5.010;
53              
54             our $HAS_AWOL;
55              
56             BEGIN
57             {
58             local $@;
59             eval "use XML::Atom::OWL;";
60             $HAS_AWOL = $@ ? 0 : 1;
61             }
62              
63             sub new
64             {
65             my ($class, $markup, $base_uri, $config, $store)= @_;
66            
67             # Rationalise $config
68             # ===================
69             # If $config is undefined, then use the default configuration
70             if (!defined $config)
71             { $config = RDF::RDFa::Parser::Config->new; }
72             # If $config is something sensible, then use it.
73             elsif (blessed($config) && $config->isa('RDF::RDFa::Parser::Config'))
74             { 1; }
75             # If it's a hashref (for backcompat), then use default plus those options
76             elsif ('HASH' eq ref $config)
77             { $config = RDF::RDFa::Parser::Config->new(undef, undef, %$config); }
78             # If it's something odd, then bail.
79             else
80             { die "Unrecognised configuration\n"; }
81              
82             # Rationalise $base_uri
83             # =====================
84             unless ($base_uri =~ /^[a-z][a-z0-9\+\-\.]*:/i)
85             { die "Need a valid base URI.\n"; }
86              
87             # Rationalise $markup and set $dom
88             # ================================
89             Carp::croak("Need to provide markup to parse.") unless defined $markup;
90            
91             my $dom;
92             eval {
93             if (blessed($markup) && $markup->isa('XML::LibXML::Document'))
94             {
95             $dom = $markup;
96             $markup = $dom->toString;
97             }
98             elsif ($config->{'dom_parser'} =~ /^(opendocument|opendoc|odf|od|odt)$/i)
99             {
100             my $parser = RDF::RDFa::Parser::OpenDocumentObjectModel->new;
101             $dom = $parser->parse_string($markup, $base_uri);
102             }
103             elsif ($config->{'dom_parser'} =~ /^(html|tagsoup|soup)$/i)
104             {
105             my $parser = HTML::HTML5::Parser->new;
106             $dom = fix_document( $parser->parse_string($markup) );
107             }
108             else
109             {
110             my $parser = XML::LibXML->new;
111            
112             my $catalogue = dist_file('RDF-RDFa-Parser', 'catalogue/index.xml');
113             $parser->load_catalog($catalogue)
114             if -r $catalogue;
115             $parser->validation(0);
116             #$parser->recover(1);
117            
118             $dom = $parser->parse_string($markup);
119             }
120             };
121            
122             # Rationalise $store
123             # ==================
124             $store = RDF::Trine::Store::Memory->temporary_store
125             unless defined $store;
126              
127             my $self = bless {
128             baseuri => $base_uri,
129             origbase => $base_uri,
130             dom => $dom,
131             model => RDF::Trine::Model->new($store),
132             bnodes => 0,
133             sub => {},
134             options => $config,
135             Graphs => {},
136             errors => [],
137             consumed => 0,
138             }, $class;
139            
140             $config->auto_config($self);
141            
142             $self->{options} = $config = $config->guess_rdfa_version($self)
143             if $config->{guess_rdfa_version};
144              
145             # HTML <base> element.
146             if ($dom and $self->{options}{xhtml_base})
147             {
148             my @bases = $self->dom->getElementsByTagName('base');
149             my $base;
150             foreach my $b (@bases)
151             {
152             if ($b->hasAttribute('href'))
153             {
154             $base = $b->getAttribute('href');
155             $base =~ s/#.*$//g;
156             }
157             }
158             $self->{baseuri} = $self->uri($base)
159             if defined $base && length $base;
160             }
161            
162             return $self;
163             }
164              
165             sub new_from_url
166             {
167             my ($class, $url, $config, $store)= @_;
168              
169             my $response = do
170             {
171             if (blessed($url) && $url->isa('HTTP::Message'))
172             {
173             $url;
174             }
175             else
176             {
177             my $ua;
178             if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config'))
179             { $ua = $config->lwp_ua; }
180             elsif (ref $config eq 'HASH')
181             { $ua = RDF::RDFa::Parser::Config->new('xml', undef, %$config)->lwp_ua; }
182             else
183             { $ua = RDF::RDFa::Parser::Config->new('xml', undef)->lwp_ua; }
184             $ua->get($url);
185             }
186             };
187             my $host = $response->content_type;
188              
189             if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config'))
190             { $config = $config->rehost($host); }
191             elsif (ref $config eq 'HASH')
192             { $config = RDF::RDFa::Parser::Config->new($host, undef, %$config); }
193             else
194             { $config = RDF::RDFa::Parser::Config->new($host, undef); }
195              
196             return $class->new(
197             $response->decoded_content,
198             ($response->base || $url).'',
199             $config,
200             $store,
201             );
202             }
203              
204             *new_from_uri = \&new_from_url;
205              
206             *new_from_response = \&new_from_url;
207              
208             sub graph
209             {
210             my $self = shift;
211             my $graph = shift;
212            
213             $self->consume;
214            
215             if (defined($graph))
216             {
217             my $tg;
218             if ($graph =~ m/^_:(.*)/)
219             {
220             $tg = RDF::Trine::Node::Blank->new($1);
221             }
222             else
223             {
224             $tg = RDF::Trine::Node::Resource->new($graph, $self->{baseuri});
225             }
226             my $m = RDF::Trine::Model->temporary_model;
227             my $i = $self->{model}->get_statements(undef, undef, undef, $tg);
228             while (my $statement = $i->next)
229             {
230             $m->add_statement($statement);
231             }
232             return $m;
233             }
234             else
235             {
236             return $self->{model};
237             }
238             }
239              
240             sub output_graph
241             {
242             shift->graph;
243             }
244              
245             sub graphs
246             {
247             my $self = shift;
248             $self->consume;
249            
250             my @graphs = keys(%{$self->{Graphs}});
251             my %result;
252             foreach my $graph (@graphs)
253             {
254             $result{$graph} = $self->graph($graph);
255             }
256             return \%result;
257             }
258              
259             sub opengraph
260             {
261             my ($self, $property, %opts) = @_;
262             $self->consume;
263            
264             $property = $1
265             if defined $property && $property =~ m'^http://opengraphprotocol\.org/schema/(.*)$';
266             $property = $1
267             if defined $property && $property =~ m'^http://ogp\.me/ns#(.*)$';
268            
269             my $rtp;
270             if (defined $property && $property =~ /^[a-z][a-z0-9\-\.\+]*:/i)
271             {
272             $rtp = [ RDF::Trine::Node::Resource->new($property) ];
273             }
274             elsif (defined $property)
275             {
276             $rtp = [
277             RDF::Trine::Node::Resource->new('http://ogp.me/ns#'.$property),
278             RDF::Trine::Node::Resource->new('http://opengraphprotocol.org/schema/'.$property),
279             ];
280             }
281            
282             my $data = {};
283             if ($rtp)
284             {
285             foreach my $rtp2 (@$rtp)
286             {
287             my $iter = $self->graph->get_statements(
288             RDF::Trine::Node::Resource->new($self->uri), $rtp2, undef);
289             while (my $st = $iter->next)
290             {
291             my $propkey = $st->predicate->uri;
292             $propkey = $1
293             if $propkey =~ m'^http://ogp\.me/ns#(.*)$'
294             || $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$';
295            
296             if ($st->object->is_resource)
297             { push @{ $data->{$propkey} }, $st->object->uri; }
298             elsif ($st->object->is_literal)
299             { push @{ $data->{$propkey} }, $st->object->literal_value; }
300             }
301             }
302             }
303             else
304             {
305             my $iter = $self->graph->get_statements(
306             RDF::Trine::Node::Resource->new($self->uri), undef, undef);
307             while (my $st = $iter->next)
308             {
309             my $propkey = $st->predicate->uri;
310             $propkey = $1
311             if $propkey =~ m'^http://ogp\.me/ns#(.*)$'
312             || $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$';
313            
314             if ($st->object->is_resource)
315             { push @{ $data->{$propkey} }, $st->object->uri; }
316             elsif ($st->object->is_literal)
317             { push @{ $data->{$propkey} }, $st->object->literal_value; }
318             }
319             }
320            
321             my @return;
322             if (defined $property)
323             { @return = @{$data->{$property}} if defined $data->{$property}; }
324             else
325             { @return = keys %$data; }
326            
327             return wantarray ? @return : $return[0];
328             }
329              
330             sub dom
331             {
332             my $self = shift;
333             return $self->{dom};
334             }
335              
336             sub uri
337             {
338             my $self = shift;
339             my $param = shift || '';
340             my $opts = shift || {};
341            
342             if ((ref $opts) =~ /^XML::LibXML/)
343             {
344             my $x = {'element' => $opts};
345             $opts = $x;
346             }
347            
348             if ($param =~ /^([a-z][a-z0-9\+\.\-]*)\:/i)
349             {
350             # seems to be an absolute URI, so can safely return "as is".
351             return $param;
352             }
353             elsif ($opts->{'require-absolute'})
354             {
355             return undef;
356             }
357            
358             my $base = $self->{baseuri};
359             if ($self->{'options'}->{'xml_base'})
360             {
361             $base = $opts->{'xml_base'} || $self->{baseuri};
362             }
363            
364             my $rv = $self->{options}{uri_class}->new_abs($param, $base);
365             return "$rv";
366             }
367              
368             sub errors
369             {
370             my $self = shift;
371             return @{$self->{errors}};
372             }
373              
374             sub processor_graph
375             {
376             my ($self, $model, $context) = @_;
377             $model ||= RDF::Trine::Model->new( RDF::Trine::Store->temporary_store );
378              
379             my $RDF = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
380             my $RDFA = RDF::Trine::Namespace->new('http://www.w3.org/ns/rdfa#');
381             my $CNT = RDF::Trine::Namespace->new('http://www.w3.org/2011/content#');
382             my $PTR = RDF::Trine::Namespace->new('http://www.w3.org/2009/pointers#');
383             my $DC = RDF::Trine::Namespace->new('http://purl.org/dc/terms/');
384             my $ERR = RDF::Trine::Namespace->new('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:error:');
385              
386             my $uuid = Data::UUID->new;
387             my $mkuri = sub
388             {
389             my $id = $uuid->create_str;
390             return $ERR->$id;
391             };
392              
393             my $st = sub
394             {
395             my @n = map
396             { blessed($_) ? $_ : RDF::Trine::Node::Literal->new($_); }
397             @_;
398             if ($context)
399             {
400             $model->add_statement(
401             RDF::Trine::Statement::Quad->new(@n, $context)
402             );
403             }
404             else
405             {
406             $model->add_statement(
407             RDF::Trine::Statement->new(@n)
408             );
409             }
410             };
411              
412             my $typemap = {(
413             ERR_CODE_HOST , 'DocumentError',
414             ERR_CODE_RDFXML_MUDDLE , '',
415             ERR_CODE_RDFXML_MESS , 'DocumentError',
416             ERR_CODE_PREFIX_BUILTIN , 'DocumentError',
417             ERR_CODE_PREFIX_ILLEGAL , 'DocumentError',
418             ERR_CODE_PREFIX_DISABLED , '',
419             ERR_CODE_INSTANCEOF_USED , '',
420             ERR_CODE_INSTANCEOF_OVERRULED , '',
421             ERR_CODE_CURIE_FELLTHROUGH , '',
422             ERR_CODE_CURIE_UNDEFINED , 'UnresolvedCURIE',
423             ERR_CODE_BNODE_WRONGPLACE , '',
424             ERR_CODE_VOCAB_DISABLED , '',
425             ERR_CODE_LANG_INVALID , 'DocumentError',
426             )};
427            
428             foreach my $err ($self->errors)
429             {
430             my $iri = $mkuri->();
431             my ($level, $code, $message, $args) = @$err;
432            
433             if ($level eq ERR_WARNING)
434             {
435             $st->($iri, $RDF->type, $RDFA->Warning);
436             }
437             elsif ($level eq ERR_ERROR)
438             {
439             $st->($iri, $RDF->type, $RDFA->Error);
440             }
441             if (my $class = $typemap->{$code})
442             {
443             $st->($iri, $RDF->type, $RDFA->$class);
444             }
445            
446             $st->($iri, $DC->description, $message);
447            
448             if (blessed($args->{element}) and $args->{element}->can('nodePath'))
449             {
450             my $p_iri = $mkuri->();
451             $st->($iri, $RDFA->context, $p_iri);
452             $st->($p_iri, $RDF->type, $PTR->XPathPointer);
453             $st->($p_iri, $PTR->expression, $args->{element}->nodePath);
454             }
455             }
456            
457             return $model;
458             }
459              
460             sub processor_and_output_graph
461             {
462             my $self = shift;
463             my $model = RDF::Trine::Model->new;
464             $self->$_->get_statements->each(sub { $model->add_statement(+shift) })
465             foreach qw( processor_graph graph );
466             return $model;
467             }
468              
469             sub _log_error
470             {
471             my ($self, $level, $code, $message, %args) = @_;
472            
473             if (defined $self->{'sub'}->{'onerror'})
474             {
475             $self->{'sub'}->{'onerror'}(@_);
476             }
477             elsif ($level eq ERR_ERROR)
478             {
479             Carp::carp(sprintf("%04X: %s\n", $code, $message));
480             Carp::carp(sprintf("... with URI <%s>\n", $args{'uri'}))
481             if defined $args{'uri'};
482             Carp::carp(sprintf("... on element '%s' with path '%s'\n", $args{'element'}->localname, $args{'element'}->nodePath))
483             if blessed($args{'element'}) && $args{'element'}->isa('XML::LibXML::Node');
484             }
485            
486             push @{$self->{errors}}, [$level, $code, $message, \%args];
487             }
488              
489             sub consume
490             {
491             my ($self, %args) = @_;
492            
493             return if $self->{'consumed'};
494             $self->{'consumed'}++;
495            
496             if (!$self->{dom})
497             {
498             if ($args{survive})
499             {
500             $self->_log_error(
501             ERR_ERROR,
502             ERR_CODE_HOST,
503             'Input could not be parsed into a DOM!',
504             );
505             }
506             else
507             {
508             Carp::croak("Input could not be parsed into a DOM!");
509             }
510             return $self;
511             }
512            
513             if ($self->{options}{graph})
514             {
515             $self->{options}{graph_attr} = 'graph'
516             unless defined $self->{options}{graph_attr};
517             $self->{options}{graph_type} = 'about'
518             unless defined $self->{options}{graph_type};
519             $self->{options}{graph_default} = $self->bnode
520             unless defined $self->{options}{graph_default};
521             }
522              
523             local *XML::LibXML::Element::getAttributeNsSafe = sub
524             {
525             my ($element, $nsuri, $attribute) = @_;
526             return defined $nsuri ? $element->getAttributeNS($nsuri, $attribute) : $element->getAttribute($attribute);
527             };
528             local *XML::LibXML::Element::hasAttributeNsSafe = sub
529             {
530             my ($element, $nsuri, $attribute) = @_;
531             return defined $nsuri ? $element->hasAttributeNS($nsuri, $attribute) : $element->hasAttribute($attribute);
532             };
533              
534             $self->_consume_element($self->dom->documentElement, { init => 1});
535            
536             if ($self->{options}{atom_parser} && $HAS_AWOL)
537             {
538             my $awol = XML::Atom::OWL->new( $self->dom , $self->uri , undef, $self->{'model'} );
539             $awol->{'bnode_generator'} = $self;
540             $awol->set_callbacks( $self->{'sub'} );
541             $awol->consume;
542             }
543            
544             return $self;
545             }
546              
547             sub _consume_element
548             # http://www.w3.org/TR/rdfa-syntax/#sec_5.5.
549             {
550             my $self = shift;
551            
552             # Processing begins by applying the processing rules below to the document
553             # object, in the context of this initial [evaluation context]. All elements
554             # in the tree are also processed according to the rules described below,
555             # depth-first, although the [evaluation context] used for each set of rules
556             # will be based on previous rules that may have been applied.
557             my $current_element = shift;
558              
559             # shouldn't happen, but return 0 if it does.
560             return 0 unless $current_element->nodeType == XML_ELEMENT_NODE;
561            
562             # The evaluation context.
563             my $args = shift;
564             my ($base, $parent_subject, $parent_subject_elem, $parent_object, $parent_object_elem,
565             $list_mappings, $uri_mappings, $term_mappings, $incomplete_triples, $language,
566             $graph, $graph_elem, $xml_base);
567            
568             if ($args->{'init'})
569             {
570             my $init = RDF::RDFa::Parser::InitialContext->new(
571             $self->{options}{initial_context},
572             );
573             # At the beginning of processing, an initial [evaluation context] is created
574             $base = $self->uri;
575             $parent_subject = $base;
576             $parent_subject_elem = $self->dom->documentElement;
577             $parent_object = undef;
578             $parent_object_elem = undef;
579             $uri_mappings = +{ insensitive => $init->uri_mappings };
580             $term_mappings = +{ insensitive => $init->term_mappings };
581             $incomplete_triples = [];
582             $list_mappings = {};
583             $language = undef;
584             $graph = $self->{options}{graph} ? $self->{options}{graph_default} : undef;
585             $graph_elem = undef;
586             $xml_base = undef;
587            
588             if ($self->{options}{vocab_default})
589             {
590             $uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default};
591             }
592            
593             if ($self->{options}{prefix_default})
594             {
595             $uri_mappings->{'(DEFAULT PREFIX)'} = $self->{options}{prefix_default};
596             }
597             }
598             else
599             {
600             $base = $args->{'base'};
601             $parent_subject = $args->{'parent_subject'};
602             $parent_subject_elem = $args->{'parent_subject_elem'};
603             $parent_object = $args->{'parent_object'};
604             $parent_object_elem = $args->{'parent_object_elem'};
605             $uri_mappings = dclone($args->{'uri_mappings'});
606             $term_mappings = dclone($args->{'term_mappings'});
607             $incomplete_triples = $args->{'incomplete_triples'};
608             $list_mappings = $args->{'list_mappings'};
609             $language = $args->{'language'};
610             $graph = $args->{'graph'};
611             $graph_elem = $args->{'graph_elem'};
612             $xml_base = $args->{'xml_base'};
613             }
614              
615             # Used by OpenDocument, otherwise usually undef.
616             my $rdfans = $self->{options}{ns} || undef;
617              
618             # First, the local values are initialized
619             my $recurse = 1;
620             my $skip_element = 0;
621             my $new_subject = undef;
622             my $new_subject_elem = undef;
623             my $current_object_resource = undef;
624             my $current_object_resource_elem = undef;
625             my $typed_resource = undef;
626             my $typed_resource_elem = undef;
627             my $local_uri_mappings = $uri_mappings;
628             my $local_term_mappings = $term_mappings;
629             my $local_incomplete_triples = [];
630             my $current_language = $language;
631            
632             my $activity = 0;
633              
634             # MOVED THIS SLIGHTLY EARLIER IN THE PROCESSING so that it can apply
635             # to RDF/XML chunks.
636             #
637             # The [current element] is also parsed for any language information, and
638             # if present, [current language] is set accordingly.
639             # Language information can be provided using the general-purpose XML
640             # attribute @xml:lang .
641             if ($self->{options}{xhtml_lang}
642             && $current_element->hasAttribute('lang'))
643             {
644             if ($self->_valid_lang( $current_element->getAttribute('lang') ))
645             {
646             $current_language = $current_element->getAttribute('lang');
647             }
648             else
649             {
650             $self->_log_error(
651             ERR_WARNING,
652             ERR_CODE_LANG_INVALID,
653             sprintf('Language code "%s" is not valid.', $current_element->getAtrribute('lang')),
654             element => $current_element,
655             lang => $current_element->getAttribute('lang'),
656             ) if $@;
657             }
658             }
659             if ($self->{options}{xml_lang}
660             && $current_element->hasAttributeNsSafe(XML_XML_NS, 'lang'))
661             {
662             if ($self->_valid_lang( $current_element->getAttributeNsSafe(XML_XML_NS, 'lang') ))
663             {
664             $current_language = $current_element->getAttributeNsSafe(XML_XML_NS, 'lang');
665             }
666             else
667             {
668             $self->_log_error(
669             ERR_WARNING,
670             ERR_CODE_LANG_INVALID,
671             sprintf('Language code "%s" is not valid.', $current_element->getAttributeNsSafe(XML_XML_NS, 'lang')),
672             element => $current_element,
673             lang => $current_element->getAttributeNsSafe(XML_XML_NS, 'lang'),
674             ) if $@;
675             }
676             }
677              
678             # EXTENSION
679             # xml:base - important for RDF/XML extension
680             if ($current_element->hasAttributeNsSafe(XML_XML_NS, 'base'))
681             {
682             my $old_base = $xml_base;
683             $xml_base = $current_element->getAttributeNsSafe(XML_XML_NS, 'base');
684             $xml_base =~ s/#.*$//g;
685             $xml_base = $self->uri($xml_base,
686             {'element'=>$current_element,'xml_base'=>$old_base});
687             }
688             my $hrefsrc_base = $base;
689             if ($self->{options}{xml_base}==2 && defined $xml_base)
690             {
691             $hrefsrc_base = $xml_base;
692             }
693              
694             # EXTENSION
695             # Parses embedded RDF/XML - mostly useful for non-XHTML documents, e.g. SVG.
696             if ($self->{options}{embedded_rdfxml}
697             && $current_element->localname eq 'RDF'
698             && $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
699             {
700             return 1 if $self->{options}{embedded_rdfxml}==2;
701              
702             my $g = $graph;
703             unless ($self->{options}{embedded_rdfxml} == 3)
704             {
705             $g = $self->bnode;
706             }
707            
708             my $fake_lang = 0;
709             unless ($current_element->hasAttributeNsSafe(XML_XML_NS, 'lang'))
710             {
711             $current_element->setAttributeNS(XML_XML_NS, 'lang', $current_language);
712             $fake_lang = 1;
713             }
714            
715             my $rdfxml_base = $self->{'origbase'};
716             $rdfxml_base = $base
717             if $self->{options}{xhtml_base}==2;
718             $rdfxml_base = $xml_base
719             if defined $xml_base;
720            
721             eval {
722             my $_map;
723             my $bnode_mapper = sub {
724             my $orig = shift;
725             $_map->{$orig} = $self->bnode
726             unless defined $_map->{$orig};
727             return $_map->{$orig};
728             };
729             my $parser = RDF::Trine::Parser->new('rdfxml');
730             my $r = $parser->parse(
731             $rdfxml_base,
732             $current_element->toStringEC14N,
733             sub {
734             my $st = shift;
735             my ($s, $p, @o);
736            
737             $s = $st->subject->is_blank ?
738             $bnode_mapper->($st->subject->blank_identifier) :
739             $st->subject->uri_value ;
740             $p = $st->predicate->uri_value ;
741             if ($st->object->is_literal)
742             {
743             @o = (
744             $st->object->literal_value,
745             $st->object->literal_datatype,
746             $st->object->literal_value_language,
747             );
748             $self->_insert_triple_literal({current=>$current_element},
749             $s, $p, @o,
750             ($self->{options}{graph} ? $g : undef));
751             }
752             else
753             {
754             push @o, $st->object->is_blank ?
755             $bnode_mapper->($st->object->blank_identifier) :
756             $st->object->uri_value;
757             $self->_insert_triple_resource({current=>$current_element},
758             $s, $p, @o,
759             ($self->{options}{graph} ? $g : undef));
760             }
761             });
762             };
763            
764             $self->_log_error(
765             ERR_ERROR,
766             ERR_CODE_RDFXML_MESS,
767             "Could not parse embedded RDF/XML content: ${@}",
768             element => $current_element,
769             ) if $@;
770            
771             $current_element->removeAttributeNS(XML_XML_NS, 'lang')
772             if ($fake_lang);
773            
774             return 1;
775             }
776             elsif ($current_element->localname eq 'RDF'
777             and $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
778             {
779             $self->_log_error(
780             ERR_WARNING,
781             ERR_CODE_RDFXML_MUDDLE,
782             'Encountered embedded RDF/XML content, but not configured to parse or skip it.',
783             element => $current_element,
784             );
785             }
786            
787             # Next the [current element] is parsed for [URI mapping]s and these are
788             # added to the [local list of URI mappings]. Note that a [URI mapping]
789             # will simply overwrite any current mapping in the list that has the same
790             # name
791             #
792             # Mappings are provided by @xmlns. The value to be mapped is set by
793             # the XML namespace prefix, and the value to map is the value of the
794             # attribute - a URI. Note that the URI is not processed in any way;
795             # in particular if it is a relative path it is not resolved against
796             # the current [base]. Authors are advised to follow best practice
797             # for using namespaces, which includes not using relative paths.
798             if ($self->{'options'}->{'xmlns_attr'})
799             {
800             foreach my $A ($current_element->getAttributes)
801             {
802             my $attr = $A->getName;
803            
804             if ($attr =~ /^xmlns\:(.+)$/i)
805             {
806             my $pfx = $self->{'options'}->{'prefix_nocase_xmlns'} ? (lc $1) : $1;
807             my $cls = $self->{'options'}->{'prefix_nocase_xmlns'} ? 'insensitive' : 'sensitive';
808             my $uri = $A->getValue;
809            
810             if ($pfx =~ /^(xml|xmlns|_)$/i)
811             {
812             $self->_log_error(
813             ERR_ERROR,
814             ERR_CODE_PREFIX_BUILTIN,
815             "Attempt to redefine built-in CURIE prefix '$pfx' not allowed.",
816             element => $current_element,
817             prefix => $pfx,
818             uri => $uri,
819             );
820             }
821             elsif ($pfx !~ /^($XML::RegExp::NCName)$/)
822             {
823             $self->_log_error(
824             ERR_ERROR,
825             ERR_CODE_PREFIX_ILLEGAL,
826             "Attempt to define non-NCName CURIE prefix '$pfx' not allowed.",
827             element => $current_element,
828             prefix => $pfx,
829             uri => $uri,
830             );
831             }
832             elsif ($uri eq XML_XML_NS || $uri eq XML_XMLNS_NS)
833             {
834             $self->_log_error(
835             ERR_ERROR,
836             ERR_CODE_PREFIX_BUILTIN,
837             "Attempt to define any CURIE prefix for '$uri' not allowed using \@xmlns.",
838             element => $current_element,
839             prefix => $pfx,
840             uri => $uri,
841             );
842             }
843             else
844             {
845             $self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls)
846             if defined $self->{'sub'}->{'onprefix'};
847            
848             $local_uri_mappings->{$cls}->{$pfx} = $uri;
849             }
850             }
851             }
852             }
853            
854             # RDFa 1.1 - @prefix support.
855             # Note that this overwrites @xmlns:foo.
856             if ($self->{'options'}->{'prefix_attr'}
857             && $current_element->hasAttributeNsSafe($rdfans, 'prefix'))
858             {
859             my $pfx_attr = $current_element->getAttributeNsSafe($rdfans, 'prefix') . ' ';
860             my @bits = split /[\s\r\n]+/, $pfx_attr;
861             while (@bits)
862             {
863             my ($bit1, $bit2, @rest) = @bits;
864             @bits = @rest;
865             $bit1 =~ s/:$//;
866            
867             my $pfx = $self->{'options'}->{'prefix_nocase_attr'} ? (lc $bit1) : $bit1;
868             my $cls = $self->{'options'}->{'prefix_nocase_attr'} ? 'insensitive' : 'sensitive';
869             my $uri = $bit2;
870            
871             unless ($pfx =~ /^$XML::RegExp::NCName$/)
872             {
873             $self->_log_error(
874             ERR_ERROR,
875             ERR_CODE_PREFIX_ILLEGAL,
876             "Attempt to define non-NCName CURIE prefix '$pfx' not allowed.",
877             element => $current_element,
878             prefix => $pfx,
879             uri => $uri,
880             );
881             next;
882             }
883            
884             $self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls)
885             if defined $self->{'sub'}->{'onprefix'};
886             $local_uri_mappings->{$cls}->{$pfx} = $uri;
887             }
888             }
889             elsif ($current_element->hasAttributeNsSafe($rdfans, 'prefix'))
890             {
891             $self->_log_error(
892             ERR_WARNING,
893             ERR_CODE_PREFIX_DISABLED,
894             "\@prefix found, but support disabled.",
895             element => $current_element,
896             );
897             }
898            
899             # RDFa 1.1 - @vocab support
900             if ($self->{options}{vocab_attr}
901             && $current_element->hasAttributeNsSafe($rdfans, 'vocab'))
902             {
903             if ($current_element->getAttributeNsSafe($rdfans, 'vocab') eq '')
904             {
905             $local_uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default};
906             }
907             else
908             {
909             $local_uri_mappings->{'(VOCAB)'} = $self->uri(
910             $current_element->getAttributeNsSafe($rdfans, 'vocab'),
911             {'element'=>$current_element,'xml_base'=>$xml_base});
912             }
913             }
914             elsif ($current_element->hasAttributeNsSafe($rdfans, 'vocab'))
915             {
916             $self->_log_error(
917             ERR_WARNING,
918             ERR_CODE_VOCAB_DISABLED,
919             "\@vocab found, but support disabled.",
920             element => $current_element,
921             uri => $self->uri(
922             $current_element->getAttributeNsSafe($rdfans, 'vocab'),
923             {'element'=>$current_element,'xml_base'=>$xml_base}),
924             );
925             }
926            
927             # EXTENSION
928             # KjetilK's named graphs.
929             if ($self->{'options'}->{'graph'})
930             {
931             my ($xmlns, $attr) = ($self->{'options'}->{'graph_attr'} =~ /^(?:\{(.+)\})?(.+)$/);
932             unless ($attr)
933             {
934             $xmlns = $rdfans;
935             $attr = 'graph';
936             }
937            
938             if ($self->{'options'}->{'graph_type'} eq 'id'
939             && $current_element->hasAttributeNsSafe($xmlns, $attr))
940             {
941             $graph = $self->uri('#' . $current_element->getAttributeNsSafe($xmlns, $attr),
942             {'element'=>$current_element,'xml_base'=>$hrefsrc_base});
943             }
944             elsif ($self->{'options'}->{'graph_type'} eq 'about'
945             && $current_element->hasAttributeNsSafe($xmlns, $attr))
946             {
947             $graph = $self->_expand_curie(
948             $current_element->getAttributeNsSafe($xmlns, $attr),
949             element => $current_element,
950             attribute => 'graph',
951             prefixes => $local_uri_mappings,
952             terms => $local_term_mappings,
953             xml_base => $xml_base,
954             );
955             $graph = $self->{'options'}->{'graph_default'}
956             unless defined $graph;
957             }
958             }
959              
960             if ($self->{options}{vocab_triple}
961             and $self->{options}{vocab_attr}
962             and $current_element->hasAttributeNsSafe($rdfans, 'vocab')
963             and defined $local_uri_mappings->{'(VOCAB)'})
964             {
965             $self->_insert_triple_resource({
966             current => $current_element,
967             subject => $current_element->ownerDocument->documentElement,
968             predicate => $current_element,
969             object => $current_element,
970             graph => $graph_elem,
971             },
972             $base,
973             'http://www.w3.org/ns/rdfa#usesVocabulary',
974             $local_uri_mappings->{'(VOCAB)'},
975             $graph);
976             }
977            
978             # EXTENSION: @role
979             if ($self->{'options'}->{'role_attr'}
980             && $current_element->hasAttributeNsSafe($rdfans, 'role'))
981             {
982             my @role = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'role') );
983             my @ROLE = map {
984             my $x = $self->_expand_curie(
985             $_,
986             element => $current_element,
987             attribute => 'role',
988             prefixes => $local_uri_mappings,
989             terms => $local_term_mappings,
990             xml_base => $xml_base,
991             );
992             defined $x ? ($x) : ();
993             } @role;
994             if (@ROLE)
995             {
996             if ($current_element->hasAttribute('id')
997             and !defined $self->{element_subjects}->{$current_element->nodePath})
998             {
999             $self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s',
1000             $current_element->getAttribute('id')),
1001             {'element'=>$current_element,'xml_base'=>$hrefsrc_base});
1002             }
1003             elsif (!defined $self->{element_subjects}->{$current_element->nodePath})
1004             {
1005             $self->{element_subjects}->{$current_element->nodePath} = $self->bnode;
1006             }
1007              
1008             foreach my $r (@ROLE)
1009             {
1010             my $E = {
1011             current => $current_element,
1012             subject => $current_element,
1013             predicate => $current_element,
1014             object => $current_element,
1015             graph => $graph_elem,
1016             };
1017             $self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#role', $r, $graph);
1018             }
1019             }
1020             }
1021            
1022             # EXTENSION: @cite
1023             if ($self->{'options'}->{'cite_attr'}
1024             && $current_element->hasAttributeNsSafe($rdfans, 'cite'))
1025             {
1026             my $citation = $self->uri(
1027             $current_element->getAttributeNsSafe($rdfans, 'cite'),
1028             {'element'=>$current_element,'xml_base'=>$hrefsrc_base}
1029             );
1030             if (defined $citation)
1031             {
1032             if ($current_element->hasAttribute('id')
1033             and !defined $self->{element_subjects}->{$current_element->nodePath})
1034             {
1035             $self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s',
1036             $current_element->getAttribute('id')),
1037             {'element'=>$current_element,'xml_base'=>$hrefsrc_base});
1038             }
1039             elsif (!defined $self->{element_subjects}->{$current_element->nodePath})
1040             {
1041             $self->{element_subjects}->{$current_element->nodePath} = $self->bnode;
1042             }
1043            
1044             my $E = {
1045             current => $current_element,
1046             subject => $current_element,
1047             predicate => $current_element,
1048             object => $current_element,
1049             graph => $graph_elem,
1050             };
1051             $self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#cite', $citation, $graph);
1052             }
1053             }
1054            
1055             my @rel = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rel') );
1056             my @rev = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rev') );
1057              
1058             # EXTENSION: rel="alternate stylesheet"
1059             if ($self->{options}{alt_stylesheet}
1060             && (grep /^alternate$/i, @rel)
1061             && (grep /^stylesheet$/i, @rel))
1062             {
1063             @rel = grep !/^(alternate|stylesheet)$/i, @rel;
1064             push @rel, ':ALTERNATE-STYLESHEET';
1065             }
1066            
1067             my @REL = map {
1068             my $x = $self->_expand_curie(
1069             $_,
1070             element => $current_element,
1071             attribute => 'rel',
1072             prefixes => $local_uri_mappings,
1073             terms => $local_term_mappings,
1074             xml_base => $xml_base,
1075             );
1076             defined $x ? ($x) : ();
1077             } @rel;
1078             my @REV = map {
1079             my $x = $self->_expand_curie(
1080             $_,
1081             element => $current_element,
1082             attribute => 'rev',
1083             prefixes => $local_uri_mappings,
1084             terms => $local_term_mappings,
1085             xml_base => $xml_base,
1086             );
1087             defined $x ? ($x) : ();
1088             } @rev;
1089              
1090             my $NEW_SUBJECT_ATTR_ABOUT = sub
1091             {
1092             if ($current_element->hasAttributeNsSafe($rdfans, 'about'))
1093             {
1094             my $s = $self->_expand_curie(
1095             $current_element->getAttributeNsSafe($rdfans, 'about'),
1096             element => $current_element,
1097             attribute => 'about',
1098             prefixes => $local_uri_mappings,
1099             terms => $local_term_mappings,
1100             xml_base => $xml_base,
1101             );
1102             my $e = $current_element;
1103             return ($s, $e);
1104             }
1105             return;
1106             };
1107            
1108             my $NEW_SUBJECT_ATTR_SRC = sub
1109             {
1110             if ($current_element->hasAttributeNsSafe($rdfans, 'src'))
1111             {
1112             my $s = $self->uri(
1113             $current_element->getAttributeNsSafe($rdfans, 'src'),
1114             {'element'=>$current_element,'xml_base'=>$hrefsrc_base}
1115             );
1116             my $e = $current_element;
1117             return ($s, $e);
1118             }
1119             return;
1120             };
1121            
1122             my $NEW_SUBJECT_DEFAULTS = sub
1123             {
1124             if ($current_element == $current_element->ownerDocument->documentElement)
1125             {
1126             return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element);
1127             }
1128            
1129             # if the element is the head or body element then act as if
1130             # there is an empty @about present, and process it according to
1131             # the rule for @about, above;
1132             if ($self->{options}{xhtml_elements}
1133             && ($current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml')
1134             && ($current_element->tagName eq 'head' || $current_element->tagName eq 'body'))
1135             {
1136             return ($parent_object, $parent_object_elem)
1137             if $self->{options}{xhtml_elements}==2;
1138             return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element);
1139             }
1140              
1141             # EXTENSION: atom elements
1142             if ($self->{options}{atom_elements}
1143             && ($current_element->namespaceURI eq 'http://www.w3.org/2005/Atom')
1144             && ($current_element->tagName eq 'feed' || $current_element->tagName eq 'entry'))
1145             {
1146             return ($self->_atom_magic($current_element), $current_element);
1147             }
1148            
1149             return;
1150             };
1151            
1152             my $NEW_SUBJECT_INHERIT = sub
1153             {
1154             $skip_element = 1
1155             if shift
1156             && not $current_element->hasAttributeNsSafe($rdfans, 'property');
1157              
1158             return ($parent_object, $parent_object_elem) if $parent_object;
1159             return;
1160             };
1161            
1162             my $NEW_SUBJECT_ATTR_RESOURCE = sub
1163             {
1164             if ($current_element->hasAttributeNsSafe($rdfans, 'resource'))
1165             {
1166             my $s = $self->_expand_curie(
1167             $current_element->getAttributeNsSafe($rdfans, 'resource'),
1168             element => $current_element,
1169             attribute => 'resource',
1170             prefixes => $local_uri_mappings,
1171             terms => $local_term_mappings,
1172             xml_base => $xml_base,
1173             );
1174             return ($s, $current_element);
1175             }
1176             return;
1177             };
1178              
1179             my $NEW_SUBJECT_ATTR_HREF = sub
1180             {
1181             if ($current_element->hasAttributeNsSafe($rdfans, 'href'))
1182             {
1183             my $s = $self->uri(
1184             $current_element->getAttributeNsSafe($rdfans, 'href'),
1185             {'element'=>$current_element,'xml_base'=>$hrefsrc_base}
1186             );
1187             return ($s, $current_element);
1188             }
1189             return;
1190             };
1191              
1192             my $NEW_SUBJECT_ATTR_TYPEOF = sub
1193             {
1194             if ($current_element->hasAttributeNsSafe($rdfans, 'typeof')
1195             or $current_element->hasAttributeNsSafe($rdfans, 'instanceof'))
1196             {
1197             if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof')
1198             and not $current_element->hasAttributeNsSafe($rdfans, 'typeof'))
1199             {
1200             $self->_log_error(
1201             ERR_WARNING,
1202             ERR_CODE_INSTANCEOF_USED,
1203             "Deprecated \@instanceof found; using it anyway.",
1204             element => $current_element,
1205             );
1206             }
1207            
1208             return ($self->bnode($current_element), $current_element);
1209             }
1210             return;
1211             };
1212              
1213             # If the current element contains no @rel or @rev attribute, then the
1214             # next step is to establish a value for new subject. This step has two
1215             # possible alternatives.
1216             #
1217             # If the current element contains the @property attribute, but does not
1218             # contain either the @content or @datatype attributes, then
1219             #
1220             if (!$current_element->hasAttributeNsSafe($rdfans, 'rel')
1221             and !$current_element->hasAttributeNsSafe($rdfans, 'rev')
1222             and $current_element->hasAttributeNsSafe($rdfans, 'property')
1223             and !$current_element->hasAttributeNsSafe($rdfans, 'datatype')
1224             and !$current_element->hasAttributeNsSafe($rdfans, 'content')
1225             and $self->{options}{property_resources})
1226             {
1227             # new subject is set to the resource obtained from the first match
1228             # from the following rule:
1229             #
1230             # - by using the resource from @about, if present, obtained according
1231             # to the section on CURIE and IRI Processing;
1232             # - otherwise, if the element is the root element of the document, then
1233             # act as if there is an empty @about present, and process it according
1234             # to the rule for @about, above;
1235             # - otherwise, if parent object is present, new subject is set to the
1236             # value of parent object.
1237             #
1238             # TOBYINK: we add @src to that for RDFa 1.0/1.1 mish-mashes.
1239             #
1240             foreach my $code (
1241             $NEW_SUBJECT_ATTR_ABOUT,
1242             ($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object},
1243             $NEW_SUBJECT_DEFAULTS,
1244             $NEW_SUBJECT_INHERIT,
1245             ) {
1246             ($new_subject, $new_subject_elem) = $code->() unless $new_subject;
1247             }
1248            
1249             # If @typeof is present then typed resource is set to the resource
1250             # obtained from the first match from the following rules:
1251             #
1252             if ($current_element->hasAttributeNsSafe($rdfans, 'typeof')
1253             or $current_element->hasAttributeNsSafe($rdfans, 'instanceof'))
1254             {
1255             # - by using the resource from @about, if present, obtained
1256             # according to the section on CURIE and IRI Processing;
1257             # - otherwise, if the element is the root element of the
1258             # document, then act as if there is an empty @about present
1259             # and process it according to the previous rule;
1260             #
1261             foreach my $code (
1262             $NEW_SUBJECT_ATTR_ABOUT,
1263             ($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object},
1264             $NEW_SUBJECT_DEFAULTS,
1265             ) {
1266             ($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource;
1267             }
1268            
1269             # - otherwise,
1270             unless ($typed_resource)
1271             {
1272             # + by using the resource from @resource, if present,
1273             # obtained according to the section on CURIE and IRI
1274             # Processing;
1275             # + otherwise, by using the IRI from @href, if present,
1276             # obtained according to the section on CURIE and IRI
1277             # Processing;
1278             # + otherwise, by using the IRI from @src, if present,
1279             # obtained according to the section on CURIE and IRI
1280             # Processing;
1281             #
1282             foreach my $code (
1283             $NEW_SUBJECT_ATTR_RESOURCE,
1284             $NEW_SUBJECT_ATTR_HREF,
1285             ($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object},
1286             ) {
1287             ($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource;
1288             }
1289            
1290             # + otherwise, the value of typed resource is set to a
1291             # newly created bnode.
1292             #
1293             unless ($typed_resource)
1294             {
1295             ($typed_resource, $typed_resource_elem) =
1296             ($self->bnode($current_element), $current_element);
1297             }
1298            
1299             # + The value of the current object resource is then set
1300             # to the value of typed resource.
1301             #
1302             ($current_object_resource, $current_object_resource_elem) =
1303             ($typed_resource, $typed_resource_elem);
1304             }
1305             }
1306             }
1307            
1308             # otherwise
1309             elsif (!$current_element->hasAttributeNsSafe($rdfans, 'rel')
1310             and !$current_element->hasAttributeNsSafe($rdfans, 'rev'))
1311             {
1312             # - If the element contains an @about, @href, @src, or @resource
1313             # attribute, new subject is set to the resource obtained as
1314             # follows:
1315             # + by using the resource from @about, if present, obtained
1316             # according to the section on CURIE and IRI Processing;
1317             # + otherwise, by using the resource from @resource, if
1318             # present, obtained according to the section on CURIE and
1319             # IRI Processing;
1320             # + otherwise, by using the IRI from @href, if present,
1321             # obtained according to the section on CURIE and IRI
1322             # Processing;
1323             # + otherwise, by using the IRI from @src, if present,
1324             # obtained according to the section on CURIE and IRI
1325             # Processing.
1326             # - otherwise, if no resource is provided by a resource
1327             # attribute, then the first match from the following rules
1328             # will apply:
1329             # + if the element is the root element of the document,
1330             # then act as if there is an empty @about present, and
1331             # process it according to the rule for @about, above;
1332             # + otherwise, if @typeof is present, then new subject is
1333             # set to be a newly created bnode;
1334             # + otherwise, if parent object is present, new subject is
1335             # set to the value of parent object. Additionally, if
1336             # @property is not present then the skip element flag is
1337             # set to 'true'.
1338             #
1339             my $i;
1340             foreach my $code (
1341             $NEW_SUBJECT_ATTR_ABOUT,
1342             ($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object},
1343             $NEW_SUBJECT_ATTR_RESOURCE,
1344             $NEW_SUBJECT_ATTR_HREF,
1345             ($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object},
1346             $NEW_SUBJECT_DEFAULTS,
1347             $NEW_SUBJECT_ATTR_TYPEOF,
1348             sub { $NEW_SUBJECT_INHERIT->(1) },
1349             ) {
1350             last if $new_subject;
1351             ($new_subject, $new_subject_elem) = $code->();
1352             }
1353              
1354             # if ($current_element->{'x-foo'})
1355             # {
1356             # use Data::Dumper;
1357             # print Dumper \%args;
1358             # }
1359            
1360             # - Finally, if @typeof is present, set the typed resource
1361             # to the value of new subject.
1362             #
1363             if ($current_element->hasAttributeNsSafe($rdfans, 'typeof')
1364             or $current_element->hasAttributeNsSafe($rdfans, 'instanceof'))
1365             {
1366             ($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem);
1367             }
1368             }
1369            
1370             # If the [current element] does contain a valid @rel or @rev URI, obtained
1371             # according to the section on CURIE and URI Processing, then the next step
1372             # is to establish both a value for [new subject] and a value for [current
1373             # object resource]:
1374             else
1375             {
1376             foreach my $code (
1377             $NEW_SUBJECT_ATTR_ABOUT,
1378             ($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object},
1379             ($NEW_SUBJECT_ATTR_TYPEOF) x!$self->{options}{typeof_resources},
1380             $NEW_SUBJECT_DEFAULTS,
1381             $NEW_SUBJECT_INHERIT,
1382             ) {
1383             ($new_subject, $new_subject_elem) = $code->() unless $new_subject;
1384             }
1385              
1386             foreach my $code (
1387             $NEW_SUBJECT_ATTR_RESOURCE,
1388             $NEW_SUBJECT_ATTR_HREF,
1389             ($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object},
1390             ) {
1391             ($current_object_resource, $current_object_resource_elem) = $code->() unless $current_object_resource;
1392             }
1393            
1394             if ($current_element->hasAttributeNsSafe($rdfans, 'typeof')
1395             or $current_element->hasAttributeNsSafe($rdfans, 'instanceof'))
1396             {
1397             if ($current_element->hasAttributeNsSafe($rdfans, 'about'))
1398             {
1399             ($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem);
1400             }
1401             elsif ($self->{options}{typeof_resources})
1402             {
1403             ($current_object_resource, $current_object_resource_elem) =
1404             ($self->bnode($current_element), $current_element)
1405             unless $current_object_resource;
1406            
1407             ($typed_resource, $typed_resource_elem) = ($current_object_resource, $current_object_resource_elem);
1408             }
1409             else
1410             {
1411             ($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem);
1412             }
1413             }
1414             }
1415            
1416             # # NOTE: x876587
1417             # if (!defined $new_subject
1418             # and $current_element->nodePath eq $self->dom->documentElement->nodePath)
1419             # {
1420             # $new_subject = $self->uri('');
1421             # $new_subject_elem = $self->dom->documentElement;
1422             # $skip_element = 1
1423             # unless $current_element->hasAttributeNsSafe($rdfans, 'property');
1424             # }
1425            
1426             # If in any of the previous steps a [typed resource] was set to a non-null
1427             # value, it is now used to provide a subject for type values
1428             if ($typed_resource
1429             && ( $current_element->hasAttributeNsSafe($rdfans, 'instanceof')
1430             || $current_element->hasAttributeNsSafe($rdfans, 'typeof')))
1431             {
1432              
1433             if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof')
1434             && $current_element->hasAttributeNsSafe($rdfans, 'typeof'))
1435             {
1436             $self->_log_error(
1437             ERR_WARNING,
1438             ERR_CODE_INSTANCEOF_OVERRULED,
1439             "Deprecated \@instanceof found; ignored because \@typeof also present.",
1440             element => $current_element,
1441             );
1442             }
1443             elsif ($current_element->hasAttributeNsSafe($rdfans, 'instanceof'))
1444             {
1445             $self->_log_error(
1446             ERR_WARNING,
1447             ERR_CODE_INSTANCEOF_USED,
1448             "Deprecated \@instanceof found; using it anyway.",
1449             element => $current_element,
1450             );
1451             }
1452              
1453             # One or more 'types' for the [ new subject ] can be set by using
1454             # @instanceof. If present, the attribute must contain one or more
1455             # URIs, obtained according to the section on URI and CURIE Processing...
1456            
1457             my @instanceof = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'typeof')
1458             || $current_element->getAttributeNsSafe($rdfans, 'instanceof') );
1459            
1460             foreach my $curie (@instanceof)
1461             {
1462             my $rdftype = $self->_expand_curie(
1463             $curie,
1464             element => $current_element,
1465             attribute => 'typeof',
1466             prefixes => $local_uri_mappings,
1467             terms => $local_term_mappings,
1468             xml_base => $xml_base,
1469             );
1470             next unless defined $rdftype;
1471            
1472             # ... each of which is used to generate a triple as follows:
1473             #
1474             # subject
1475             # [new subject]
1476             # predicate
1477             # http://www.w3.org/1999/02/22-rdf-syntax-ns#type
1478             # object
1479             # full URI of 'type'
1480              
1481             my $E = { # provenance tracking
1482             current => $current_element,
1483             subject => $typed_resource_elem,
1484             predicate => $current_element,
1485             object => $current_element,
1486             graph => $graph_elem,
1487             };
1488             $self->_insert_triple_resource($E, $typed_resource, RDF_TYPE, $rdftype, $graph);
1489             $activity++;
1490             }
1491             }
1492              
1493             # EXTENSION: @longdesc
1494             if ($self->{'options'}->{'longdesc_attr'}
1495             && $current_element->hasAttributeNsSafe($rdfans, 'longdesc'))
1496             {
1497             my $longdesc = $self->uri(
1498             $current_element->getAttributeNsSafe($rdfans, 'longdesc'),
1499             {'element'=>$current_element,'xml_base'=>$hrefsrc_base}
1500             );
1501             if (defined $longdesc)
1502             {
1503             my $E = {
1504             current => $new_subject_elem,
1505             subject => $current_element,
1506             predicate => $current_element,
1507             object => $current_element,
1508             graph => $graph_elem,
1509             };
1510             $self->_insert_triple_resource($E, $new_subject, 'http://www.w3.org/2007/05/powder-s#describedby', $longdesc, $graph);
1511             }
1512             }
1513              
1514             # If in any of the previous steps a new subject was set to a non-null value
1515             # different from the parent object; The list mapping taken from the
1516             # evaluation context is set to a new, empty mapping.
1517             if (defined $new_subject
1518             and $new_subject ne $parent_subject || !%$list_mappings)
1519             {
1520             $list_mappings = {
1521             '::meta' => {
1522             id => Data::UUID->new->create_str,
1523             owner => $current_element,
1524             },
1525             };
1526             }
1527              
1528             # If in any of the previous steps a [current object resource] was set to
1529             # a non-null value, it is now used to generate triples and add entries to
1530             # the local list mapping
1531             if ($current_object_resource)
1532             {
1533             # If the element contains both the inlist and the rel attributes: the
1534             # rel may contain one or more IRIs, obtained according to the section
1535             # on CURIE and IRI Processing each of which is used to add an entry to
1536             # the list mapping as follows:
1537             if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')
1538             and $current_element->hasAttributeNsSafe($rdfans, 'rel'))
1539             {
1540             foreach my $r (@REL)
1541             {
1542             # if the local list mapping does not contain a list associated with
1543             # the IRI, instantiate a new list and add to local list mappings
1544             $list_mappings->{$r} = [] unless defined $list_mappings->{$r};
1545            
1546             # add the current object resource to the list associated with the IRI
1547             # in the local list mapping
1548             push @{ $list_mappings->{$r} }, [resource => $current_object_resource];
1549             $activity++;
1550             }
1551             }
1552            
1553             # XXX:@inlist doesn't support @rev?
1554             #
1555             # if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')
1556             # and $current_element->hasAttributeNsSafe($rdfans, 'rev'))
1557             # {
1558             # foreach my $r (@REV)
1559             # {
1560             # # if the local list mapping does not contain a list associated with
1561             # # the IRI, instantiate a new list and add to local list mappings
1562             # $list_mappings->{'REV:'.$r} = [] unless defined $list_mappings->{'REV:'.$r};
1563             #
1564             # # add the current object resource to the list associated with the IRI
1565             # # in the local list mapping
1566             # push @{ $list_mappings->{'REV:'.$r} }, [resource => $current_object_resource];
1567             # }
1568             # }
1569            
1570             my $E = { # provenance tracking
1571             current => $current_element,
1572             subject => $new_subject_elem,
1573             predicate => $current_element,
1574             object => $current_object_resource_elem,
1575             graph => $graph_elem,
1576             };
1577            
1578             # Predicates for the [ current object resource ] can be set by
1579             # using one or both of the @rel and @rev attributes, but, in
1580             # case of the @rel attribute, only if the @inlist is not present:
1581             #
1582             # * If present, @rel will contain one or more URIs, obtained
1583             # according to the section on CURIE and URI Processing each
1584             # of which is used to generate a triple as follows:
1585             #
1586             # subject
1587             # [new subject]
1588             # predicate
1589             # full URI
1590             # object
1591             # [current object resource]
1592            
1593             unless ($current_element->hasAttributeNsSafe($rdfans, 'inlist'))
1594             {
1595             foreach my $r (@REL)
1596             {
1597             $self->_insert_triple_resource($E, $new_subject, $r, $current_object_resource, $graph);
1598             $activity++;
1599             }
1600             }
1601            
1602             # * If present, @rev will contain one or more URIs, obtained
1603             # according to the section on CURIE and URI Processing each
1604             # of which is used to generate a triple as follows:
1605             #
1606             # subject
1607             # [current object resource]
1608             # predicate
1609             # full URI
1610             # object
1611             # [new subject]
1612            
1613             $E = { # provenance tracking
1614             current => $current_element,
1615             subject => $current_object_resource_elem,
1616             predicate => $current_element,
1617             object => $new_subject_elem,
1618             graph => $graph_elem,
1619             };
1620             foreach my $r (@REV)
1621             {
1622             $self->_insert_triple_resource($E, $current_object_resource, $r, $new_subject, $graph);
1623             $activity++;
1624             }
1625             }
1626            
1627             # If however [current object resource] was set to null, but there are
1628             # predicates present, then they must be stored as [incomplete triple]s,
1629             # pending the discovery of a subject that can be used as the object. Also,
1630             # [current object resource] should be set to a newly created [bnode]
1631             elsif ((scalar @REL) || (scalar @REV))
1632             {
1633             # Predicates for [incomplete triple]s can be set by using one or
1634             # both of the @rel and @rev attributes:
1635             #
1636             # * If present, @rel must contain one or more URIs, obtained
1637             # according to the section on CURIE and URI Processing each
1638             # of which is added to the [local list of incomplete triples]
1639             # as follows:
1640             #
1641             # predicate
1642             # full URI
1643             # direction
1644             # forward
1645            
1646             push @$local_incomplete_triples,
1647             map {
1648             $current_element->hasAttributeNsSafe($rdfans, 'inlist')
1649             ?{
1650             list => do { $list_mappings->{$_} = [] unless defined $list_mappings->{$_}; $list_mappings->{$_} },
1651             direction => 'none',
1652             }
1653             :{
1654             predicate => $_,
1655             direction => 'forward',
1656             graph => $graph,
1657             predicate_element => $current_element,
1658             graph_element => $graph_elem,
1659             }
1660             } @REL;
1661            
1662             # * If present, @rev must contain one or more URIs, obtained
1663             # according to the section on CURIE and URI Processing, each
1664             # of which is added to the [local list of incomplete triples]
1665             # as follows:
1666             #
1667             # predicate
1668             # full URI
1669             # direction
1670             # reverse
1671            
1672             push @$local_incomplete_triples,
1673             map {
1674             # $current_element->hasAttributeNsSafe($rdfans, 'inlist')
1675             # ?{
1676             # list => do { $list_mappings->{'REV:'.$_} = [] unless defined $list_mappings->{'REV:'.$_}; $list_mappings->{'REV:'.$_}; },
1677             # direction => 'none',
1678             # }
1679             # :{
1680             +{
1681             predicate => $_,
1682             direction => 'reverse',
1683             graph => $graph,
1684             predicate_element => $current_element,
1685             graph_element => $graph_elem,
1686             }
1687             } @REV;
1688            
1689             $current_object_resource = $self->bnode;
1690             $current_object_resource_elem = $current_element;
1691             }
1692              
1693             # The next step of the iteration is to establish any [current
1694             # property value]
1695             my @current_property_value;
1696            
1697             my @prop = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'property') );
1698              
1699             my $has_datatype = 0;
1700             my $datatype = undef;
1701             if ($current_element->hasAttributeNsSafe($rdfans, 'datatype'))
1702             {
1703             $has_datatype = 1;
1704             $datatype = $self->_expand_curie(
1705             $current_element->getAttributeNsSafe($rdfans, 'datatype'),
1706             element => $current_element,
1707             attribute => 'datatype',
1708             prefixes => $local_uri_mappings,
1709             terms => $local_term_mappings,
1710             xml_base => $xml_base,
1711             );
1712             }
1713            
1714             if (@prop)
1715             {
1716             # Predicates for the [current object literal] can be set by using
1717             # @property. If present, one or more URIs are obtained according
1718             # to the section on CURIE and URI Processing and then the actual
1719             # literal value is obtained as follows:
1720            
1721             # HTML+RDFa
1722             if ($self->{options}{datetime_attr}
1723             and (
1724             $current_element->hasAttributeNsSafe($rdfans, 'datetime')
1725             or $current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml'
1726             && lc($current_element->tagName) eq 'time'
1727             )) {
1728             @current_property_value = (
1729             $current_element->hasAttributeNsSafe($rdfans, 'datetime')
1730             ? $current_element->getAttributeNsSafe($rdfans, 'datetime')
1731             : $self->_element_to_string($current_element)
1732             );
1733            
1734             push @current_property_value, do
1735             {
1736             local $_ = $current_property_value[0];
1737            
1738             if (!!$has_datatype == !!1)
1739             { $datatype }
1740             elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1741             { 'http://www.w3.org/2001/XMLSchema#dateTime' }
1742             elsif (/^(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1743             { 'http://www.w3.org/2001/XMLSchema#time' }
1744             elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1745             { 'http://www.w3.org/2001/XMLSchema#date' }
1746             elsif (/^(\-?\d{4,})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1747             { 'http://www.w3.org/2001/XMLSchema#gYearMonth' } # XXX: not in spec!
1748             elsif (/^(\-?\d{4,})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1749             { 'http://www.w3.org/2001/XMLSchema#gYear' } # XXX: not in spec!
1750             elsif (/^--(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1751             { 'http://www.w3.org/2001/XMLSchema#gMonthDay' } # XXX: not in spec!
1752             elsif (/^---(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1753             { 'http://www.w3.org/2001/XMLSchema#gDay' } # XXX: not in spec!
1754             elsif (/^--(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i)
1755             { 'http://www.w3.org/2001/XMLSchema#gMonth' } # XXX: not in spec!
1756             elsif (/^P([\d\.]+Y)?([\d\.]+M)?([\d\.]+D)?(T([\d\.]+H)?([\d\.]+M)?([\d\.]+S)?)?$/i)
1757             { 'http://www.w3.org/2001/XMLSchema#duration' }
1758             else
1759             { undef }
1760             }, $current_language;
1761             }
1762              
1763             # HTML+RDFa
1764             elsif ($self->{options}{value_attr}
1765             and $current_element->hasAttributeNsSafe($rdfans, 'value'))
1766             {
1767             @current_property_value = (
1768             $current_element->getAttributeNsSafe($rdfans, 'value'),
1769             ($has_datatype ? $datatype : undef),
1770             $current_language,
1771             );
1772             }
1773              
1774             # as a [ plain literal ] if:
1775             #
1776             # @content is present;
1777             elsif ($current_element->hasAttributeNsSafe($rdfans, 'content'))
1778             {
1779             @current_property_value = (
1780             $current_element->getAttributeNsSafe($rdfans, 'content'),
1781             ($has_datatype ? $datatype : undef),
1782             $current_language,
1783             );
1784             }
1785            
1786             # OpenDocument 1.2 extension
1787             elsif (defined $self->{options}{bookmark_end}
1788             and defined $self->{options}{bookmark_name}
1789             and sprintf('{%s}%s', $current_element->namespaceURI, $current_element->localname)
1790             ~~ ['{}'.$self->{options}{bookmark_start}, $self->{options}{bookmark_start}]
1791             ) {
1792             @current_property_value = (
1793             $self->_element_to_bookmarked_string($current_element),
1794             ($has_datatype ? $datatype: undef),
1795             $current_language,
1796             );
1797             }
1798            
1799             # Additionally, if there is a value for [current language] then
1800             # the value of the [plain literal] should include this language
1801             # information, as described in [RDF-CONCEPTS]. The actual literal
1802             # is either the value of @content (if present) or a string created
1803             # by concatenating the text content of each of the descendant
1804             # elements of the [current element] in document order.
1805            
1806             # or all children of the [current element] are text nodes;
1807             # or there are no child nodes;
1808             # or the body of the [ current element ] does have non-text
1809             # child nodes but @datatype is present, with an empty value.
1810             elsif ($has_datatype and $datatype eq '')
1811             {
1812             @current_property_value = (
1813             $self->_element_to_string($current_element),
1814             ($has_datatype ? $datatype: undef),
1815             $current_language,
1816             );
1817             }
1818              
1819             # as an [XML literal] if: explicitly rdf:XMLLiteral.
1820             elsif ($datatype eq RDF_XMLLIT)
1821             {
1822             @current_property_value = (
1823             $self->_element_to_xml($current_element, $current_language),
1824             RDF_XMLLIT,
1825             $current_language,
1826             );
1827             $recurse = $self->{options}{xmllit_recurse};
1828             }
1829            
1830             # as a [typed literal] if:
1831             #
1832             # * @datatype is present, and does not have an empty value.
1833             #
1834             # The actual literal is either the value of @content (if present)
1835             # or a string created by concatenating the value of all descendant
1836             # text nodes, of the [current element] in turn. The final string
1837             # includes the datatype URI, as described in [RDF-CONCEPTS], which
1838             # will have been obtained according to the section on CURIE and URI
1839             # Processing.
1840             elsif ($has_datatype)
1841             {
1842             if ($current_element->hasAttributeNsSafe($rdfans, 'content'))
1843             {
1844             @current_property_value = (
1845             $current_element->getAttributeNsSafe($rdfans, 'content'),
1846             $datatype,
1847             $current_language,
1848             );
1849             }
1850             else
1851             {
1852             @current_property_value = (
1853             $self->_element_to_string($current_element),
1854             $datatype,
1855             $current_language,
1856             );
1857             }
1858             }
1859            
1860             elsif ($self->{options}{property_resources}
1861             and !$current_element->hasAttributeNsSafe($rdfans, 'datatype')
1862             and !$current_element->hasAttributeNsSafe($rdfans, 'content')
1863             and !$current_element->hasAttributeNsSafe($rdfans, 'rel')
1864             and !$current_element->hasAttributeNsSafe($rdfans, 'rev')
1865             and (
1866             $current_element->hasAttributeNsSafe($rdfans, 'resource')
1867             or $current_element->hasAttributeNsSafe($rdfans, 'href')
1868             or $current_element->hasAttributeNsSafe($rdfans, 'src')
1869             && $self->{options}{src_sets_object}
1870             ))
1871             {
1872             my $resource;
1873             foreach my $attr (qw(resource href src))
1874             {
1875             next unless $current_element->hasAttributeNsSafe($rdfans, $attr);
1876             $resource = $self->_expand_curie(
1877             $current_element->getAttributeNsSafe($rdfans, $attr),
1878             element => $current_element,
1879             attribute => $attr,
1880             prefixes => $local_uri_mappings,
1881             terms => $local_term_mappings,
1882             xml_base => $xml_base,
1883             );
1884             last if defined $resource;
1885             }
1886             @current_property_value = ([ $resource ]) if defined $resource;
1887             }
1888            
1889             elsif ($self->{options}{property_resources}
1890             and defined $typed_resource
1891             and $current_element->hasAttributeNsSafe($rdfans, 'typeof')
1892             and !$current_element->hasAttributeNsSafe($rdfans, 'about'))
1893             {
1894             @current_property_value = ([ $typed_resource ]);
1895             }
1896              
1897             # or all children of the [current element] are text nodes;
1898             # or there are no child nodes;
1899             # or the body of the [ current element ] does have non-text
1900             # child nodes but @datatype is present, with an empty value.
1901             elsif (not $current_element->getElementsByTagName('*'))
1902             {
1903             @current_property_value = (
1904             $self->_element_to_string($current_element),
1905             ($has_datatype ? $datatype: undef),
1906             $current_language,
1907             );
1908             }
1909              
1910             # In RDFa 1.0 by default generate an XML Literal;
1911             # in RDFa 1.1 by default generate a plain literal.
1912             elsif (!$has_datatype and $current_element->getElementsByTagName('*'))
1913             {
1914             if ($self->{options}{xmllit_default})
1915             {
1916             @current_property_value = ($self->_element_to_xml($current_element, $current_language),
1917             RDF_XMLLIT,
1918             $current_language);
1919             $recurse = $self->{options}{xmllit_recurse};
1920             }
1921             else
1922             {
1923             @current_property_value = ($self->_element_to_string($current_element),
1924             undef,
1925             $current_language);
1926             }
1927             }
1928              
1929             else
1930             {
1931             die("How did we get here??\n");
1932             }
1933             }
1934            
1935             my $E = { # provenance tracking
1936             current => $current_element,
1937             subject => $new_subject_elem,
1938             predicate => $current_element,
1939             object => $current_element,
1940             graph => $graph_elem,
1941             };
1942             foreach my $property (@prop)
1943             {
1944             next unless defined $current_property_value[0];
1945            
1946             # The [current property value] is then used with each predicate to
1947             # generate a triple as follows:
1948             #
1949             # subject
1950             # [new subject]
1951             # predicate
1952             # full URI
1953             # object
1954             # [current object literal]
1955              
1956             my $p = $self->_expand_curie(
1957             $property,
1958             element => $current_element,
1959             attribute => 'property',
1960             prefixes => $local_uri_mappings,
1961             terms => $local_term_mappings,
1962             xml_base => $xml_base,
1963             );
1964             next unless defined $p;
1965            
1966             if (ref $current_property_value[0] eq 'ARRAY')
1967             {
1968             if ($current_element->hasAttributeNsSafe($rdfans, 'inlist'))
1969             {
1970             $list_mappings->{$p} = [] unless defined $list_mappings->{$p};
1971             push @{ $list_mappings->{$p} }, [resource => $current_property_value[0][0]];
1972             }
1973             else
1974             {
1975             $self->_insert_triple_resource($E, $new_subject, $p, $current_property_value[0][0], $graph);
1976             $activity++;
1977             }
1978             }
1979             else
1980             {
1981             if ($current_element->hasAttributeNsSafe($rdfans, 'inlist'))
1982             {
1983             $list_mappings->{$p} = [] unless defined $list_mappings->{$p};
1984             push @{ $list_mappings->{$p} }, [literal => @current_property_value];
1985             }
1986             else
1987             {
1988             $self->_insert_triple_literal($E, $new_subject, $p, @current_property_value, $graph);
1989             $activity++;
1990             }
1991             }
1992             # Once the triple has been created, if the [datatype] of the
1993             # [current object literal] is rdf:XMLLiteral, then the [recurse]
1994             # flag is set to false.
1995             # $recurse = 0
1996             # if $datatype eq RDF_XMLLIT;
1997             }
1998              
1999             # # If the [skip element] flag is 'false', and either: the previous step
2000             # # resulted in a 'true' flag, or [new subject] was set to a non-null and
2001             # # non-bnode value, then any [incomplete triple]s within the current context
2002             # # should be completed:
2003             # if (!$skip_element && ($flag || ((defined $new_subject) && ($new_subject !~ /^bnodeXXX:/))))
2004             # {
2005              
2006             if (!$skip_element && defined $new_subject)
2007             {
2008             # Loop through list of incomplete triples...
2009             foreach my $it (@$incomplete_triples)
2010             {
2011             my $direction = $it->{direction};
2012             my $predicate = $it->{predicate};
2013             my $parent_graph = $it->{graph};
2014              
2015             if ($direction eq 'none' and defined $it->{list})
2016             {
2017             push @{$it->{list}}, [resource => $new_subject];
2018             }
2019             elsif ($direction eq 'forward')
2020             {
2021             my $E = { # provenance tracking
2022             current => $current_element,
2023             subject => $parent_subject_elem,
2024             predicate => $it->{predicate_element},
2025             object => $new_subject_elem,
2026             graph => $it->{graph_element},
2027             };
2028              
2029             $self->_insert_triple_resource($E, $parent_subject, $predicate, $new_subject, $parent_graph);
2030             $activity++;
2031             }
2032             elsif ($direction eq 'reverse')
2033             {
2034             my $E = { # provenance tracking
2035             current => $current_element,
2036             subject => $new_subject_elem,
2037             predicate => $it->{predicate_element},
2038             object => $parent_subject_elem,
2039             graph => $it->{graph_element},
2040             };
2041            
2042             $self->_insert_triple_resource($E, $new_subject, $predicate, $parent_subject, $parent_graph);
2043             $activity++;
2044             }
2045             else
2046             {
2047             die "Direction is '$direction'??";
2048             }
2049             }
2050             }
2051              
2052             # If the [recurse] flag is 'true', all elements that are children of the
2053             # [current element] are processed using the rules described here, using a
2054             # new [evaluation context], initialized as follows
2055             my $flag = 0;
2056             if ($recurse)
2057             {
2058             my $evaluation_context;
2059            
2060             # If the [skip element] flag is 'true' then the new [evaluation context]
2061             # is a copy of the current context that was passed in to this level of
2062             # processing, with the [language] and [list of URI mappings] values
2063             # replaced with the local values;
2064             if ($skip_element)
2065             {
2066             $evaluation_context = {
2067             %$args,
2068             base => $base,
2069             language => $current_language,
2070             uri_mappings => $uri_mappings,
2071             term_mappings => $term_mappings,
2072             list_mappings => $list_mappings,
2073             # parent_subject => $parent_subject,
2074             # parent_subject_elem => $parent_subject_elem,
2075             # parent_object => $parent_object,
2076             # parent_object_elem => $parent_object_elem,
2077             # incomplete_triples => $incomplete_triples,
2078             graph => $graph,
2079             graph_elem => $graph_elem,
2080             xml_base => $xml_base,
2081             parent => $args,
2082             };
2083             }
2084            
2085             # Otherwise, the values are:
2086             else
2087             {
2088             $evaluation_context = {
2089             base => $base,
2090             parent_subject => $new_subject,
2091             parent_subject_elem => $new_subject_elem,
2092             parent_object => (defined $current_object_resource ? $current_object_resource : (defined $new_subject ? $new_subject : $parent_subject)),
2093             parent_object_elem => (defined $current_object_resource_elem ? $current_object_resource_elem : (defined $new_subject_elem ? $new_subject_elem : $parent_subject_elem)),
2094             uri_mappings => $local_uri_mappings,
2095             term_mappings => $local_term_mappings,
2096             incomplete_triples => $local_incomplete_triples,
2097             list_mappings => $list_mappings,
2098             language => $current_language,
2099             graph => $graph,
2100             graph_elem => $graph_elem,
2101             xml_base => $xml_base,
2102             parent => $args,
2103             };
2104             }
2105            
2106             foreach my $kid ($current_element->getChildrenByTagName('*'))
2107             {
2108             $flag = $self->_consume_element($kid, $evaluation_context) || $flag;
2109             }
2110             }
2111              
2112             # Once all the child elements have been traversed, list triples are
2113             # generated, if necessary.
2114             if ($list_mappings->{'::meta'}{owner} == $current_element)
2115             {
2116             foreach my $iri (keys %$list_mappings)
2117             {
2118             next if $iri eq '::meta';
2119            
2120             # For each IRI in the local list mapping, if the equivalent list does
2121             # not exist in the evaluation context, indicating that the list was
2122             # originally defined on the current element, use the list as follows:
2123             if ($args->{list_mappings}{$iri} == $list_mappings->{$iri}
2124             and ref $args->{list_mappings}{$iri} eq 'HASH'
2125             and %{ $args->{list_mappings}{$iri} })
2126             {
2127             next;
2128             }
2129            
2130             # Create a new 'bnode' array containing newly created bnodes, one for
2131             # each element in the list
2132             my @bnode = map { $self->bnode; } @{ $list_mappings->{$iri} };
2133             my $first = @bnode ? $bnode[0] : undef;
2134            
2135             while (my $bnode = shift @bnode)
2136             {
2137             my $value = shift @{ $list_mappings->{$iri} };
2138             my $type = shift @$value;
2139            
2140             my $E = { # provenance tracking
2141             current => $current_element,
2142             graph => $graph_elem,
2143             };
2144             if ($type eq 'literal')
2145             {
2146             $self->_insert_triple_literal($E, $bnode, RDF_FIRST, @$value, $graph);
2147             }
2148             else
2149             {
2150             $self->_insert_triple_resource($E, $bnode, RDF_FIRST, @$value, $graph);
2151             }
2152              
2153             if (exists $bnode[0])
2154             {
2155             $self->_insert_triple_resource($E, $bnode, RDF_REST, $bnode[0], $graph);
2156             }
2157             else
2158             {
2159             $self->_insert_triple_resource($E, $bnode, RDF_REST, RDF_NIL, $graph);
2160             }
2161             }
2162            
2163             my $E = { # provenance tracking
2164             current => $current_element,
2165             subject => $new_subject_elem,
2166             predicate => $current_element,
2167             graph => $graph_elem,
2168             };
2169            
2170             #my ($attr, $iri) = split /:/, $iri, 2;
2171             my $attr = 'REL';
2172            
2173             if (defined $first)
2174             {
2175             $attr eq 'REV'
2176             ? $self->_insert_triple_resource($E, $first, $iri, $new_subject, $graph)
2177             : $self->_insert_triple_resource($E, $new_subject, $iri, $first, $graph);
2178             }
2179             else
2180             {
2181             $attr eq 'REV'
2182             ? $self->_insert_triple_resource($E, RDF_NIL, $iri, $new_subject, $graph)
2183             : $self->_insert_triple_resource($E, $new_subject, $iri, RDF_NIL, $graph);
2184             }
2185            
2186             $activity++;
2187             }
2188             }
2189            
2190             return 1 if $activity || $new_subject || $flag;
2191             return 0;
2192             }
2193              
2194             sub set_callbacks
2195             # Set callback functions for handling RDF triples.
2196             {
2197             my $self = shift;
2198              
2199             if ('HASH' eq ref $_[0])
2200             {
2201             $self->{'sub'} = $_[0];
2202             $self->{'sub'}->{'pretriple_resource'} = \&_print0
2203             if lc ($self->{'sub'}->{'pretriple_resource'}||'') eq 'print';
2204             $self->{'sub'}->{'pretriple_literal'} = \&_print1
2205             if lc ($self->{'sub'}->{'pretriple_literal'}||'') eq 'print';
2206             }
2207             else
2208             {
2209             die "Unsupported set_callbacks call.\n";
2210             }
2211            
2212             return $self;
2213             }
2214              
2215             sub _print0
2216             # Prints a Turtle triple.
2217             {
2218             my $self = shift;
2219             my $element = shift;
2220             my $subject = shift;
2221             my $pred = shift;
2222             my $object = shift;
2223             my $graph = shift;
2224            
2225             if ($graph)
2226             {
2227             print "# GRAPH $graph\n";
2228             }
2229             if ($element)
2230             {
2231             printf("# Triple on element %s.\n", $element->nodePath);
2232             }
2233             else
2234             {
2235             printf("# Triple.\n");
2236             }
2237              
2238             printf("%s %s %s .\n",
2239             ($subject =~ /^_:/ ? $subject : "<$subject>"),
2240             "<$pred>",
2241             ($object =~ /^_:/ ? $object : "<$object>"));
2242            
2243             return;
2244             }
2245              
2246             sub _print1
2247             # Prints a Turtle triple.
2248             {
2249             my $self = shift;
2250             my $element = shift;
2251             my $subject = shift;
2252             my $pred = shift;
2253             my $object = shift;
2254             my $dt = shift;
2255             my $lang = shift;
2256             my $graph = shift;
2257            
2258             # Clumsy, but probably works.
2259             $object =~ s/\\/\\\\/g;
2260             $object =~ s/\n/\\n/g;
2261             $object =~ s/\r/\\r/g;
2262             $object =~ s/\t/\\t/g;
2263             $object =~ s/\"/\\\"/g;
2264            
2265             if ($graph)
2266             {
2267             print "# GRAPH $graph\n";
2268             }
2269             if ($element)
2270             {
2271             printf("# Triple on element %s.\n", $element->nodePath);
2272             }
2273             else
2274             {
2275             printf("# Triple.\n");
2276             }
2277              
2278             printf("%s %s %s%s%s .\n",
2279             ($subject =~ /^_:/ ? $subject : "<$subject>"),
2280             "<$pred>",
2281             "\"$object\"",
2282             (length $dt ? "^^<$dt>" : ''),
2283             ((length $lang && !length $dt) ? "\@$lang" : '')
2284             );
2285            
2286             return;
2287             }
2288              
2289             sub element_subjects
2290             {
2291             my ($self) = shift;
2292             $self->consume;
2293             $self->{element_subjects} = shift if @_;
2294             return $self->{element_subjects};
2295             }
2296              
2297             sub _insert_triple_resource
2298             {
2299             my $self = shift;
2300              
2301             my $element = shift; # A reference to the XML::LibXML element being parsed
2302             my $subject = shift; # Subject URI or bnode
2303             my $predicate = shift; # Predicate URI
2304             my $object = shift; # Resource URI or bnode
2305             my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled)
2306              
2307             my $suppress_triple = 0;
2308             $suppress_triple = $self->{'sub'}->{'pretriple_resource'}(
2309             $self,
2310             ref $element ? $element->{current} : undef,
2311             $subject,
2312             $predicate,
2313             $object,
2314             $graph,
2315             )
2316             if defined $self->{'sub'}->{'pretriple_resource'};
2317             return if $suppress_triple;
2318            
2319             # First make sure the object node type is ok.
2320             my $to;
2321             if ($object =~ m/^_:(.*)/)
2322             {
2323             $to = RDF::Trine::Node::Blank->new($1);
2324             }
2325             else
2326             {
2327             $to = RDF::Trine::Node::Resource->new($object);
2328             }
2329              
2330             # Run the common function
2331             return $self->_insert_triple_common($element, $subject, $predicate, $to, $graph);
2332             }
2333              
2334             sub _insert_triple_literal
2335             {
2336             my $self = shift;
2337              
2338             my $element = shift; # A reference to the XML::LibXML element being parsed
2339             my $subject = shift; # Subject URI or bnode
2340             my $predicate = shift; # Predicate URI
2341             my $object = shift; # Resource Literal
2342             my $datatype = shift; # Datatype URI (possibly undef or '')
2343             my $language = shift; # Language (possibly undef or '')
2344             my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled)
2345              
2346             my $suppress_triple = 0;
2347             $suppress_triple = $self->{'sub'}->{'pretriple_literal'}(
2348             $self,
2349             ref $element ? $element->{current} : undef,
2350             $subject,
2351             $predicate,
2352             $object,
2353             $datatype,
2354             $language,
2355             $graph,
2356             )
2357             if defined $self->{'sub'}->{'pretriple_literal'};
2358             return if $suppress_triple;
2359              
2360             # Now we know there's a literal
2361             my $to;
2362            
2363             # Work around bad Unicode handling in RDF::Trine.
2364             # $object = encode_utf8($object);
2365              
2366             if (defined $datatype)
2367             {
2368             if ($datatype eq RDF_XMLLIT)
2369             {
2370             if ($self->{options}{use_rtnlx})
2371             {
2372             eval
2373             {
2374             require RDF::Trine::Node::Literal::XML;
2375             $to = RDF::Trine::Node::Literal::XML->new($element->childNodes);
2376             };
2377             }
2378            
2379             if ( $@ || !defined $to)
2380             {
2381             my $orig = $RDF::Trine::Node::Literal::USE_XMLLITERALS;
2382             $RDF::Trine::Node::Literal::USE_XMLLITERALS = 0;
2383             $to = RDF::Trine::Node::Literal->new($object, undef, $datatype);
2384             $RDF::Trine::Node::Literal::USE_XMLLITERALS = $orig;
2385             }
2386             }
2387             else
2388             {
2389             $to = RDF::Trine::Node::Literal->new($object, undef, $datatype);
2390             }
2391             }
2392             else
2393             {
2394             $to = RDF::Trine::Node::Literal->new($object, $language, undef);
2395             }
2396              
2397             # Run the common function
2398             $self->_insert_triple_common($element, $subject, $predicate, $to, $graph);
2399             }
2400              
2401             sub _insert_triple_common
2402             {
2403             my $self = shift; # A reference to the RDF::RDFa::Parser object
2404             my $element = shift; # A reference to the XML::LibXML element being parsed
2405             my $subject = shift; # Subject URI or bnode
2406             my $predicate = shift; # Predicate URI
2407             my $to = shift; # RDF::Trine::Node Resource URI or bnode
2408             my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled)
2409              
2410             # First, make sure subject and predicates are the right kind of nodes
2411             my $tp = RDF::Trine::Node::Resource->new($predicate);
2412             my $ts;
2413             if ($subject =~ m/^_:(.*)/)
2414             {
2415             $ts = RDF::Trine::Node::Blank->new($1);
2416             }
2417             else
2418             {
2419             $ts = RDF::Trine::Node::Resource->new($subject);
2420             }
2421              
2422             my $statement;
2423              
2424             # If we are configured for it, and graph name can be found, add it.
2425             if ($self->{'options'}->{'graph'} && $graph)
2426             {
2427             $self->{Graphs}->{$graph}++;
2428            
2429             my $tg;
2430             if ($graph =~ m/^_:(.*)/)
2431             {
2432             $tg = RDF::Trine::Node::Blank->new($1);
2433             }
2434             else
2435             {
2436             $tg = RDF::Trine::Node::Resource->new($graph);
2437             }
2438              
2439             $statement = RDF::Trine::Statement::Quad->new($ts, $tp, $to, $tg);
2440             }
2441             # If no graph name, just add triples
2442             else
2443             {
2444             $statement = RDF::Trine::Statement->new($ts, $tp, $to);
2445             }
2446              
2447             my $suppress_triple = 0;
2448             $suppress_triple = $self->{'sub'}->{'ontriple'}($self, $element, $statement)
2449             if ($self->{'sub'}->{'ontriple'});
2450             return if $suppress_triple;
2451              
2452             $self->{model}->add_statement($statement);
2453             }
2454              
2455             sub _atom_magic
2456             {
2457             my $self = shift;
2458             my $element = shift;
2459            
2460             return $self->bnode($element, 1);
2461             }
2462              
2463             # Splits things like property="foaf:name rdfs:label"
2464             sub _split_tokens
2465             {
2466             my ($self, $string) = @_;
2467             $string ||= '';
2468             $string =~ s/(^\s+|\s+$)//g;
2469             my @return = split /\s+/, $string;
2470             return @return;
2471             }
2472              
2473             sub _element_to_bookmarked_string
2474             {
2475             my ($self, $bookmark) = @_;
2476              
2477             my @name_attribute;
2478             if ($self->{'options'}->{'bookmark_name'} =~ /^\{(.*)\}(.+)$/)
2479             {
2480             @name_attribute = $1 ? ($1, $2) : (undef, $2);
2481             }
2482             else
2483             {
2484             @name_attribute = (undef, $self->{'options'}->{'bookmark_name'});
2485             }
2486            
2487             my ($endtag_namespace, $endtag_localname);
2488             if ($self->{'options'}->{'bookmark_end'} =~ /^\{(.*)\}(.+)$/)
2489             {
2490             ($endtag_namespace, $endtag_localname) = $1 ? ($1, $2) : (undef, $2);
2491             }
2492             else
2493             {
2494             ($endtag_namespace, $endtag_localname) = (undef, $self->{'options'}->{'bookmark_end'});
2495             }
2496              
2497             my $string = '';
2498             my $current = $bookmark;
2499             while ($current)
2500             {
2501             $current = $self->_find_next_node($current);
2502            
2503             if (defined $current
2504             && $current->nodeType == XML_TEXT_NODE)
2505             {
2506             $string .= $current->getData;
2507             }
2508             if (defined $current
2509             && $current->nodeType == XML_ELEMENT_NODE
2510             && $current->localname eq $endtag_localname
2511             && $current->namespaceURI eq $endtag_namespace
2512             && $current->getAttributeNsSafe(@name_attribute) eq $bookmark->getAttributeNsSafe(@name_attribute))
2513             {
2514             $current = undef;
2515             }
2516             }
2517            
2518             return $string;
2519             }
2520              
2521             sub _find_next_node
2522             {
2523             my ($self, $node) = @_;
2524            
2525             if ($node->nodeType == XML_ELEMENT_NODE)
2526             {
2527             my @kids = $node->childNodes;
2528             return $kids[0] if @kids;
2529             }
2530            
2531             my $ancestor = $node;
2532             while ($ancestor)
2533             {
2534             return $ancestor->nextSibling if $ancestor->nextSibling;
2535             $ancestor = $ancestor->parentNode;
2536             }
2537            
2538             return undef;
2539             }
2540              
2541             sub _element_to_string
2542             {
2543             my $self = shift;
2544             my $dom = shift;
2545            
2546             if ($dom->nodeType == XML_TEXT_NODE)
2547             {
2548             return $dom->getData;
2549             }
2550             elsif ($dom->nodeType == XML_ELEMENT_NODE)
2551             {
2552             my $rv = '';
2553             foreach my $kid ($dom->childNodes)
2554             { $rv .= $self->_element_to_string($kid); }
2555             return $rv;
2556             }
2557              
2558             return '';
2559             }
2560              
2561             sub _element_to_xml
2562             {
2563             my $self = shift;
2564             my $dom = shift;
2565             my $lang = shift;
2566             my $rv;
2567            
2568             foreach my $kid ($dom->childNodes)
2569             {
2570             my $fakelang = 0;
2571             if (($kid->nodeType == XML_ELEMENT_NODE) && defined $lang)
2572             {
2573             unless ($kid->hasAttributeNS(XML_XML_NS, 'lang'))
2574             {
2575             $kid->setAttributeNS(XML_XML_NS, 'lang', $lang);
2576             $fakelang++;
2577             }
2578             }
2579            
2580             $rv .= $kid->toStringEC14N(1);
2581            
2582             if ($fakelang)
2583             {
2584             $kid->removeAttributeNS(XML_XML_NS, 'lang');
2585             }
2586             }
2587            
2588             return $rv;
2589             }
2590              
2591             sub bnode
2592             {
2593             my $self = shift;
2594             my $element = shift;
2595             my $save_me = shift || 0;
2596             my $ident = shift || undef;
2597            
2598             if (defined $element
2599             and $self->{'saved_bnodes'}->{ $element->nodePath })
2600             {
2601             return $self->{'saved_bnodes'}->{ $element->nodePath };
2602             }
2603              
2604             elsif (defined $ident
2605             and $self->{'saved_bnodes'}->{ $ident })
2606             {
2607             return $self->{'saved_bnodes'}->{ $ident };
2608             }
2609              
2610             return sprintf('http://thing-described-by.org/?%s#%s',
2611             $self->uri,
2612             $self->{element}->getAttribute('id'))
2613             if ($self->{options}->{tdb_service} && $element && length $element->getAttribute('id'));
2614              
2615             unless (defined $self->{bnode_prefix})
2616             {
2617             $self->{bnode_prefix} = Data::UUID->new->create_str;
2618             $self->{bnode_prefix} =~ s/-//g;
2619             }
2620              
2621             my $rv;
2622             if ($self->{options}->{skolemize})
2623             {
2624             $rv = sprintf('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:skolem:%s:%04d', $self->{bnode_prefix}, $self->{bnodes}++);
2625             }
2626             else
2627             {
2628             $rv = sprintf('_:rdfa%snode%04d', $self->{bnode_prefix}, $self->{bnodes}++);
2629             }
2630            
2631             if ($save_me and defined $element)
2632             {
2633             $self->{'saved_bnodes'}->{ $element->nodePath } = $rv;
2634             }
2635              
2636             if (defined $ident)
2637             {
2638             $self->{'saved_bnodes'}->{ $ident } = $rv;
2639             }
2640              
2641             return $rv;
2642             }
2643              
2644             sub _valid_lang
2645             {
2646             my ($self, $value_to_test) = @_;
2647              
2648             return 1 if (defined $value_to_test) && ($value_to_test eq '');
2649             return 0 unless defined $value_to_test;
2650            
2651             # Regex for recognizing RFC 4646 well-formed tags
2652             # http://www.rfc-editor.org/rfc/rfc4646.txt
2653             # http://tools.ietf.org/html/draft-ietf-ltru-4646bis-21
2654              
2655             # The structure requires no forward references, so it reverses the order.
2656             # It uses Java/Perl syntax instead of the old ABNF
2657             # The uppercase comments are fragments copied from RFC 4646
2658              
2659             # Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped.
2660              
2661             my $alpha = '[a-z]'; # ALPHA
2662             my $digit = '[0-9]'; # DIGIT
2663             my $alphanum = '[a-z0-9]'; # ALPHA / DIGIT
2664             my $x = 'x'; # private use singleton
2665             my $singleton = '[a-wyz]'; # other singleton
2666             my $s = '[_-]'; # separator -- lenient parsers will use [_-] -- strict will use [-]
2667              
2668             # Now do the components. The structure is slightly different to allow for capturing the right components.
2669             # The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing.
2670              
2671             my $language = '([a-z]{2,8}) | ([a-z]{2,3} $s [a-z]{3})';
2672            
2673             # ABNF (2*3ALPHA) / 4ALPHA / 5*8ALPHA --- note: because of how | works in regex, don't use $alpha{2,3} | $alpha{4,8}
2674             # We don't have to have the general case of extlang, because there can be only one extlang (except for zh-min-nan).
2675              
2676             # Note: extlang invalid in Unicode language tags
2677              
2678             my $script = '[a-z]{4}' ; # 4ALPHA
2679              
2680             my $region = '(?: [a-z]{2}|[0-9]{3})' ; # 2ALPHA / 3DIGIT
2681              
2682             my $variant = '(?: [a-z0-9]{5,8} | [0-9] [a-z0-9]{3} )' ; # 5*8alphanum / (DIGIT 3alphanum)
2683              
2684             my $extension = '(?: [a-wyz] (?: [_-] [a-z0-9]{2,8} )+ )' ; # singleton 1*("-" (2*8alphanum))
2685              
2686             my $privateUse = '(?: x (?: [_-] [a-z0-9]{1,8} )+ )' ; # "x" 1*("-" (1*8alphanum))
2687              
2688             # Define certain grandfathered codes, since otherwise the regex is pretty useless.
2689             # Since these are limited, this is safe even later changes to the registry --
2690             # the only oddity is that it might change the type of the tag, and thus
2691             # the results from the capturing groups.
2692             # http://www.iana.org/assignments/language-subtag-registry
2693             # Note that these have to be compared case insensitively, requiring (?i) below.
2694              
2695             my $grandfathered = '(?:
2696             (en [_-] GB [_-] oed)
2697             | (i [_-] (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu ))
2698             | (no [_-] (?: bok | nyn ))
2699             | (sgn [_-] (?: BE [_-] (?: fr | nl) | CH [_-] de ))
2700             | (zh [_-] min [_-] nan)
2701             )';
2702              
2703             # old: | zh $s (?: cmn (?: $s Hans | $s Hant )? | gan | min (?: $s nan)? | wuu | yue );
2704             # For well-formedness, we don't need the ones that would otherwise pass.
2705             # For validity, they need to be checked.
2706              
2707             # $grandfatheredWellFormed = (?:
2708             # art $s lojban
2709             # | cel $s gaulish
2710             # | zh $s (?: guoyu | hakka | xiang )
2711             # );
2712              
2713             # Unicode locales: but we are shifting to a compatible form
2714             # $keyvalue = (?: $alphanum+ \= $alphanum+);
2715             # $keywords = ($keyvalue (?: \; $keyvalue)*);
2716              
2717             # We separate items that we want to capture as a single group
2718              
2719             my $variantList = $variant . '(?:' . $s . $variant . ')*' ; # special for multiples
2720             my $extensionList = $extension . '(?:' . $s . $extension . ')*' ; # special for multiples
2721              
2722             my $langtag = "
2723             ($language)
2724             ($s ( $script ) )?
2725             ($s ( $region ) )?
2726             ($s ( $variantList ) )?
2727             ($s ( $extensionList ) )?
2728             ($s ( $privateUse ) )?
2729             ";
2730              
2731             # Here is the final breakdown, with capturing groups for each of these components
2732             # The variants, extensions, grandfathered, and private-use may have interior '-'
2733            
2734             my $r = ($value_to_test =~
2735             /^(
2736             ($langtag)
2737             | ($privateUse)
2738             | ($grandfathered)
2739             )$/xi);
2740             return $r;
2741             }
2742              
2743             sub _expand_curie
2744             {
2745             my ($self, $token, %args) = @_;
2746             my $r = $self->__expand_curie($token, %args);
2747            
2748             if (defined $self->{'sub'}->{'ontoken'})
2749             {
2750             return $self->{'sub'}->{'ontoken'}($self, $args{element}, $token, $r);
2751             }
2752              
2753             return $r;
2754             }
2755              
2756             sub __expand_curie
2757             {
2758             my ($self, $token, %args) = @_;
2759              
2760             # Blank nodes
2761             {
2762             my $bnode;
2763             if ($token eq '_:' || $token eq '[_:]')
2764             { $bnode = $self->bnode(undef, undef, '_:'); }
2765             elsif ($token =~ /^_:(.+)$/i || $token =~ /^\[_:(.+)\]$/i)
2766             { $bnode = $self->bnode(undef, undef, '_:'.$1); }
2767            
2768             if (defined $bnode)
2769             {
2770             if ($args{'attribute'} =~ /^(rel|rev|property|datatype)$/i)
2771             {
2772             $self->_log_error(
2773             ERR_ERROR,
2774             ERR_CODE_BNODE_WRONGPLACE,
2775             "Blank node found in $args{attribute} where URIs are expected as values.",
2776             token => $token,
2777             element => $args{element},
2778             attribute => $args{attribute},
2779             );
2780            
2781             return $1 if $token =~ /^\[_:(.+)\]$/i;
2782             return $token;
2783             }
2784              
2785             return $bnode;
2786             }
2787             }
2788            
2789             my $is_safe = 0;
2790             if ($token =~ /^\[(.*)\]$/)
2791             {
2792             $is_safe = 1;
2793             $token = $1;
2794             }
2795            
2796             # CURIEs - default vocab
2797             if ($token =~ /^($XML::RegExp::NCName)$/
2798             and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_default_vocab'}))
2799             {
2800             my $suffix = $token;
2801            
2802             if ($args{'attribute'} eq 'role')
2803             { return 'http://www.w3.org/1999/xhtml/vocab#' . $suffix; }
2804             elsif (defined $args{'prefixes'}{'(VOCAB)'})
2805             { return $args{'prefixes'}{'(VOCAB)'} . $suffix; }
2806            
2807             return undef if $is_safe;
2808             }
2809              
2810            
2811             # Keywords / terms / whatever-they're-called
2812             if ($token =~ /^($XML::RegExp::NCName)$/
2813             and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_term'}))
2814             {
2815             my $terms = $args{'terms'};
2816             my $attr = $args{'attribute'};
2817            
2818             return $terms->{'sensitive'}{$attr}{$token}
2819             if defined $terms->{'sensitive'}{ $attr }{$token};
2820            
2821             return $terms->{'sensitive'}{'*'}{$token}
2822             if defined $terms->{'sensitive'}{'*'}{$token};
2823            
2824             return $terms->{'insensitive'}{$attr}{lc $token}
2825             if defined $terms->{'insensitive'}{$attr}{lc $token};
2826            
2827             return $terms->{'insensitive'}{'*'}{lc $token}
2828             if defined $terms->{'insensitive'}{'*'}{lc $token};
2829             }
2830              
2831             # CURIEs - prefixed
2832             if ($token =~ /^($XML::RegExp::NCName)?:(\S*)$/
2833             and (
2834             $is_safe
2835             or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i
2836             or $self->{options}{safe_optional}
2837             ))
2838             {
2839             $token =~ /^($XML::RegExp::NCName)?:(\S*)$/;
2840             my $prefix = (defined $1 && length $1) ? $1 : '(DEFAULT PREFIX)';
2841             my $suffix = $2;
2842            
2843             if (defined $args{'prefixes'}{'(DEFAULT PREFIX)'} && $prefix eq '(DEFAULT PREFIX)')
2844             { return $args{'prefixes'}{'(DEFAULT PREFIX)'} . $suffix; }
2845             elsif (defined $args{'prefixes'}{'sensitive'}{$prefix})
2846             { return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; }
2847             elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix})
2848             { return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; }
2849              
2850             if ($is_safe)
2851             {
2852             $prefix = ($prefix eq '(DEFAULT PREFIX)') ? '' : $prefix;
2853             $self->_log_error(
2854             ERR_WARNING,
2855             ERR_CODE_CURIE_UNDEFINED,
2856             "CURIE '$token' used in safe CURIE, but '$prefix' is undefined.",
2857             token => $token,
2858             element => $args{element},
2859             attribute => $args{attribute},
2860             prefix => $prefix,
2861             );
2862             return undef;
2863             }
2864             }
2865              
2866             # CURIEs - bare prefixes
2867             if ($self->{options}{prefix_bare}
2868             and $token =~ /^($XML::RegExp::NCName)$/
2869             and (
2870             $is_safe
2871             or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i
2872             or $self->{options}{safe_optional}
2873             ))
2874             {
2875             my $prefix = $token;
2876             my $suffix = '';
2877            
2878             if (defined $args{'prefixes'}{'sensitive'}{$prefix})
2879             { return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; }
2880             elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix})
2881             { return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; }
2882             }
2883              
2884             # Absolute URIs
2885             if ($token =~ /^[A-Z][A-Z0-9\.\+-]*:/i and !$is_safe
2886             and ($self->{'options'}{'full_uris'} || $args{'attribute'} =~ /^(about|resource|graph)$/i))
2887             {
2888             return $token;
2889             }
2890              
2891             # Relative URIs
2892             if (!$is_safe and ($args{'attribute'} =~ /^(about|resource|graph)$/i || $args{'allow_relative'}))
2893             {
2894             return $self->uri($token, {'element'=>$args{'element'}, 'xml_base'=>$args{'xml_base'}});
2895             }
2896            
2897             $self->_log_error(
2898             ERR_WARNING,
2899             ERR_CODE_CURIE_FELLTHROUGH,
2900             "Couldn't make sense of token '$token'.",
2901             token => $token,
2902             element => $args{element},
2903             attribute => $args{attribute},
2904             );
2905              
2906             return undef;
2907             }
2908              
2909             __PACKAGE__
2910             __END__
2911              
2912             =head1 NAME
2913              
2914             RDF::RDFa::Parser - flexible RDFa parser
2915              
2916             =head1 SYNOPSIS
2917              
2918             If you're wanting to work with an RDF::Trine::Model that can be queried with SPARQL, etc:
2919              
2920             use RDF::RDFa::Parser;
2921             my $url = 'http://example.com/document.html';
2922             my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1');
2923             my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options);
2924             my $model = $rdfa->graph;
2925              
2926             For dealing with local data:
2927              
2928             use RDF::RDFa::Parser;
2929             my $base_url = 'http://example.com/document.html';
2930             my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1');
2931             my $rdfa = RDF::RDFa::Parser->new($markup, $base_url, $options);
2932             my $model = $rdfa->graph;
2933              
2934             A simple set of operations for working with Open Graph Protocol data:
2935              
2936             use RDF::RDFa::Parser;
2937             my $url = 'http://www.rottentomatoes.com/m/net/';
2938             my $options = RDF::RDFa::Parser::Config->tagsoup;
2939             my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options);
2940             print $rdfa->opengraph('title') . "\n";
2941             print $rdfa->opengraph('image') . "\n";
2942              
2943             =head1 DESCRIPTION
2944              
2945             L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module.
2946             If you are new to parsing RDFa with Perl, then that's the best place to
2947             start.
2948              
2949             =head2 Forthcoming API Changes
2950              
2951             Some of the logic regarding host language and RDFa version guessing
2952             is likely to be removed from RDF::RDFa::Parser and
2953             RDF::RDFa::Parser::Config, and shifted into RDF::TrineX::Parser::RDFa
2954             instead.
2955              
2956             =head2 Constructors
2957              
2958             =over 4
2959              
2960             =item C<< $p = RDF::RDFa::Parser->new($markup, $base, [$config], [$storage]) >>
2961              
2962             This method creates a new RDF::RDFa::Parser object and returns it.
2963              
2964             The $markup variable may contain an XHTML/XML string, or a
2965             XML::LibXML::Document. If a string, the document is parsed using
2966             XML::LibXML::Parser or HTML::HTML5::Parser, depending on the
2967             configuration in $config. XML well-formedness errors will cause the
2968             function to die.
2969              
2970             $base is a URL used to resolve relative links found in the document.
2971              
2972             $config optionally holds an RDF::RDFa::Parser::Config object which
2973             determines the set of rules used to parse the RDFa. It defaults to
2974             XHTML+RDFa 1.1.
2975              
2976             B<Advanced usage note:> $storage optionally holds an RDF::Trine::Store
2977             object. If undef, then a new temporary store is created.
2978              
2979             =item C<< $p = RDF::RDFa::Parser->new_from_url($url, [$config], [$storage]) >>
2980              
2981             =item C<< $p = RDF::RDFa::Parser->new_from_uri($url, [$config], [$storage]) >>
2982              
2983             $url is a URL to fetch and parse, or an HTTP::Response object.
2984              
2985             $config optionally holds an RDF::RDFa::Parser::Config object which
2986             determines the set of rules used to parse the RDFa. The default is
2987             to determine the configuration by looking at the HTTP response
2988             Content-Type header; it's probably sensible to keep the default.
2989              
2990             $storage optionally holds an RDF::Trine::Store object. If undef, then
2991             a new temporary store is created.
2992              
2993             This function can also be called as C<new_from_url> or C<new_from_uri>.
2994             Same thing.
2995              
2996             =item C<< $p = RDF::RDFa::Parser->new_from_response($response, [$config], [$storage]) >>
2997              
2998             $response is an C<HTTP::Response> object.
2999              
3000             Otherwise the same as C<new_from_url>.
3001              
3002             =back
3003              
3004             =head2 Public Methods
3005              
3006             =over 4
3007              
3008             =item C<< $p->graph >>
3009              
3010             This will return an RDF::Trine::Model containing all the RDFa
3011             data found on the page.
3012              
3013             B<Advanced usage note:> If passed a graph URI as a parameter,
3014             will return a single named graph from within the page. This
3015             feature is only useful if you're using named graphs.
3016              
3017             =item C<< $p->graphs >>
3018              
3019             B<Advanced usage only.>
3020              
3021             Will return a hashref of all named graphs, where the graph name is a
3022             key and the value is a RDF::Trine::Model tied to a temporary storage.
3023              
3024             This method is only useful if you're using named graphs.
3025              
3026             =item C<< $p->opengraph([$property]) >>
3027              
3028             If $property is provided, will return the value or list of values (if
3029             called in list context) for that Open Graph Protocol property. (In pure
3030             RDF terms, it returns the non-bnode objects of triples where the
3031             subject is the document base URI; and the predicate is $property,
3032             with non-URI $property strings taken as having the implicit prefix
3033             'http://ogp.me/ns#'. There is no distinction between literal and
3034             non-literal values; literal datatypes and languages are dropped.)
3035              
3036             If $property is omitted, returns a list of possible properties.
3037              
3038             Example:
3039              
3040             foreach my $property (sort $p->opengraph)
3041             {
3042             print "$property :\n";
3043             foreach my $val (sort $p->opengraph($property))
3044             {
3045             print " * $val\n";
3046             }
3047             }
3048              
3049             See also: L<http://opengraphprotocol.org/>.
3050              
3051             =item C<< $p->dom >>
3052              
3053             Returns the parsed XML::LibXML::Document.
3054              
3055             =item C<< $p->uri( [$other_uri] ) >>
3056              
3057             Returns the base URI of the document being parsed. This will usually be the
3058             same as the base URI provided to the constructor, but may differ if the
3059             document contains a <base> HTML element.
3060              
3061             Optionally it may be passed a parameter - an absolute or relative URI - in
3062             which case it returns the same URI which it was passed as a parameter, but
3063             as an absolute URI, resolved relative to the document's base URI.
3064              
3065             This seems like two unrelated functions, but if you consider the consequence
3066             of passing a relative URI consisting of a zero-length string, it in fact makes
3067             sense.
3068              
3069             =item C<< $p->errors >>
3070              
3071             Returns a list of errors and warnings that occurred during parsing.
3072              
3073             =item C<< $p->processor_graph >>
3074              
3075             As per C<< $p->errors >> but returns data as an RDF model.
3076              
3077             =item C<< $p->output_graph >>
3078              
3079             An alias for C<graph>, but does not accept a parameter.
3080              
3081             =item C<< $p->processor_and_output_graph >>
3082              
3083             Union of the above two graphs.
3084              
3085             =item C<< $p->consume >>
3086              
3087             B<Advanced usage only.>
3088              
3089             The document is parsed for RDFa. As of RDF::RDFa::Parser 1.09x,
3090             this is called automatically when needed; you probably don't need
3091             to touch it unless you're doing interesting things with callbacks.
3092              
3093             Calling C<< $p->consume(survive => 1) >> will avoid crashing (e.g.
3094             when the markup provided cannot be parsed), and instead make more
3095             errors available in C<< $p->errors >>.
3096              
3097             =item C<< $p->set_callbacks(\%callbacks) >>
3098              
3099             B<Advanced usage only.>
3100              
3101             Set callback functions for the parser to call on certain events. These are only necessary if
3102             you want to do something especially unusual.
3103              
3104             $p->set_callbacks({
3105             'pretriple_resource' => sub { ... } ,
3106             'pretriple_literal' => sub { ... } ,
3107             'ontriple' => undef ,
3108             'onprefix' => \&some_function ,
3109             });
3110              
3111             Either of the two pretriple callbacks can be set to the string 'print' instead of a coderef.
3112             This enables built-in callbacks for printing Turtle to STDOUT.
3113              
3114             For details of the callback functions, see the section CALLBACKS. If used, C<set_callbacks>
3115             must be called I<before> C<consume>. C<set_callbacks> returns a reference to the parser
3116             object itself.
3117              
3118             =item C<< $p->element_subjects >>
3119              
3120             B<Advanced usage only.>
3121              
3122             Gets/sets a hashref of { xpath => RDF::Trine::Node } mappings.
3123              
3124             This is not touched during normal RDFa parsing, only being used by the @role and
3125             @cite features where RDF resources (i.e. URIs and blank nodes) are needed to
3126             represent XML elements themselves.
3127              
3128             =back
3129              
3130             =head1 CALLBACKS
3131              
3132             Several callback functions are provided. These may be set using the C<set_callbacks> function,
3133             which takes a hashref of keys pointing to coderefs. The keys are named for the event to fire the
3134             callback on.
3135              
3136             =head2 ontriple
3137              
3138             This is called once a triple is ready to be added to the graph. (After the pretriple
3139             callbacks.) The parameters passed to the callback function are:
3140              
3141             =over 4
3142              
3143             =item * A reference to the C<RDF::RDFa::Parser> object
3144              
3145             =item * A hashref of relevant C<XML::LibXML::Element> objects (subject, predicate, object, graph, current)
3146              
3147             =item * An RDF::Trine::Statement object.
3148              
3149             =back
3150              
3151             The callback should return 1 to tell the parser to skip this triple (not add it to
3152             the graph); return 0 otherwise. The callback may modify the RDF::Trine::Statement
3153             object.
3154              
3155             =head2 onprefix
3156              
3157             This is called when a new CURIE prefix is discovered. The parameters passed
3158             to the callback function are:
3159              
3160             =over 4
3161              
3162             =item * A reference to the C<RDF::RDFa::Parser> object
3163              
3164             =item * A reference to the C<XML::LibXML::Element> being parsed
3165              
3166             =item * The prefix (string, e.g. "foaf")
3167              
3168             =item * The expanded URI (string, e.g. "http://xmlns.com/foaf/0.1/")
3169              
3170             =back
3171              
3172             The return value of this callback is currently ignored, but you should return
3173             0 in case future versions of this module assign significance to the return value.
3174              
3175             =head2 ontoken
3176              
3177             This is called when a CURIE or term has been expanded. The parameters are:
3178              
3179             =over 4
3180              
3181             =item * A reference to the C<RDF::RDFa::Parser> object
3182              
3183             =item * A reference to the C<XML::LibXML::Element> being parsed
3184              
3185             =item * The CURIE or token as a string (e.g. "foaf:name" or "Stylesheet")
3186              
3187             =item * The fully expanded URI
3188              
3189             =back
3190              
3191             The callback function must return a fully expanded URI, or if it
3192             wants the CURIE to be ignored, undef.
3193              
3194             =head2 onerror
3195              
3196             This is called when an error occurs:
3197              
3198             =over 4
3199              
3200             =item * A reference to the C<RDF::RDFa::Parser> object
3201              
3202             =item * The error level (RDF::RDFa::Parser::ERR_ERROR or
3203             RDF::RDFa::Parser::ERR_WARNING)
3204              
3205             =item * An error code
3206              
3207             =item * An error message
3208              
3209             =item * A hash of other information
3210              
3211             =back
3212              
3213             The return value of this callback is currently ignored, but you should return
3214             0 in case future versions of this module assign significance to the return value.
3215              
3216             If you do not define an onerror callback, then errors will be output via STDERR
3217             and warnings will be silent. Either way, you can retrieve errors after parsing
3218             using the C<errors> method.
3219              
3220             =head2 pretriple_resource
3221              
3222             B<This callback is deprecated - use ontriple instead.>
3223              
3224             This is called when a triple has been found, but before preparing the triple for
3225             adding to the model. It is only called for triples with a non-literal object value.
3226              
3227             The parameters passed to the callback function are:
3228              
3229             =over 4
3230              
3231             =item * A reference to the C<RDF::RDFa::Parser> object
3232              
3233             =item * A reference to the C<XML::LibXML::Element> being parsed
3234              
3235             =item * Subject URI or bnode (string)
3236              
3237             =item * Predicate URI (string)
3238              
3239             =item * Object URI or bnode (string)
3240              
3241             =item * Graph URI or bnode (string or undef)
3242              
3243             =back
3244              
3245             The callback should return 1 to tell the parser to skip this triple (not add it to
3246             the graph); return 0 otherwise.
3247              
3248             =head2 pretriple_literal
3249              
3250             B<This callback is deprecated - use ontriple instead.>
3251              
3252             This is the equivalent of pretriple_resource, but is only called for triples with a
3253             literal object value.
3254              
3255             The parameters passed to the callback function are:
3256              
3257             =over 4
3258              
3259             =item * A reference to the C<RDF::RDFa::Parser> object
3260              
3261             =item * A reference to the C<XML::LibXML::Element> being parsed
3262              
3263             =item * Subject URI or bnode (string)
3264              
3265             =item * Predicate URI (string)
3266              
3267             =item * Object literal (string)
3268              
3269             =item * Datatype URI (string or undef)
3270              
3271             =item * Language (string or undef)
3272              
3273             =item * Graph URI or bnode (string or undef)
3274              
3275             =back
3276              
3277             Beware: sometimes both a datatype I<and> a language will be passed.
3278             This goes beyond the normal RDF data model.)
3279              
3280             The callback should return 1 to tell the parser to skip this triple (not add it to
3281             the graph); return 0 otherwise.
3282              
3283             =head1 FEATURES
3284              
3285             Most features are configurable using L<RDF::RDFa::Parser::Config>.
3286              
3287             =head2 RDFa Versions
3288              
3289             RDF::RDFa::Parser supports RDFa versions 1.0 and 1.1.
3290              
3291             1.1 is currently a moving target; support is experimental.
3292              
3293             1.1 is the default, but this can be configured using RDF::RDFa::Parser::Config.
3294              
3295             =head2 Host Languages
3296              
3297             RDF::RDFa::Parser supports various different RDFa host languages:
3298              
3299             =over 4
3300              
3301             =item * B<XHTML>
3302              
3303             As per the XHTML+RDFa 1.0 and XHTML+RDFa 1.1 specifications.
3304              
3305             =item * B<HTML 4>
3306              
3307             Uses an HTML5 (sic) parser; uses @lang instead of @xml:lang; keeps prefixes
3308             and terms case-insensitive; recognises the @rel relations defined in the HTML
3309             4 specification. Otherwise the same as XHTML.
3310              
3311             =item * B<HTML5>
3312              
3313             Uses an HTML5 parser; uses @lang as well as @xml:lang; keeps prefixes
3314             and terms case-insensitive; recognises the @rel relations defined in the HTML5
3315             draft specification. Otherwise the same as XHTML.
3316              
3317             =item * B<XML>
3318              
3319             This is implemented as per the RDFa Core 1.1 specification. There is also
3320             support for "RDFa Core 1.0", for which no specification exists, but has been
3321             reverse-engineered by applying the differences between XHTML+RDFa 1.1 and
3322             RDFa Core 1.1 to the XHTML+RDFa 1.0 specification.
3323              
3324             Embedded chunks of RDF/XML within XML are supported.
3325              
3326             =item * B<SVG>
3327              
3328             For now, a synonym for XML.
3329              
3330             =item * B<Atom>
3331              
3332             The E<lt>feedE<gt> and E<lt>entryE<gt> elements are treated specially, setting
3333             a new subject; IANA-registered rel keywords are recognised.
3334              
3335             By passing C<< atom_parser=>1 >> as a Config option, you can also handle
3336             Atom's native semantics. (Uses L<XML::Atom::OWL>. If this module is not installed,
3337             this option is silently ignored.)
3338              
3339             Otherwise, the same as XML.
3340              
3341             =item * B<DataRSS>
3342              
3343             Defines some default prefixes. Otherwise, the same as Atom.
3344              
3345             =item * B<OpenDocument XML>
3346              
3347             That is, XML content formatted along the lines of 'content.xml' in OpenDocument
3348             files.
3349              
3350             Supports OpenDocument bookmarked ranges used as typed or plain object literals
3351             (though not XML literals); expects RDFa attributes in the XHTML namespace
3352             instead of in no namespace. Otherwise, the same as XML.
3353              
3354             =item * B<OpenDocument>
3355              
3356             That is, a ZIP file containing OpenDocument XML files. RDF::RDFa::Parser
3357             will do all the unzipping and combining for you, so you don't have to.
3358             The unregistered "jar:" URI scheme is used to refer to files within the ZIP.
3359              
3360             =back
3361              
3362             =head2 Embedded RDF/XML
3363              
3364             Though a rarely used feature, XHTML allows other XML markup languages
3365             to be directly embedded into it. In particular, chunks of RDF/XML can
3366             be included in XHTML. While this is not common in XHTML, it's seen quite
3367             often in SVG and other XML markup languages.
3368              
3369             When RDF::RDFa::Parser encounters a chunk of RDF/XML in a document
3370             it's parsing (i.e. an element called 'RDF' with namespace
3371             'http://www.w3.org/1999/02/22-rdf-syntax-ns#'), there are three different
3372             courses of action it can take:
3373              
3374             =over 4
3375              
3376             =item 0. Continue straight through it.
3377              
3378             This is the behaviour that XHTML+RDFa seems to suggest is the right
3379             option. It should mostly not do any harm: triples encoded in RDF/XML
3380             will be generally ignored (though the chunk itself could theoretically
3381             end up as part of an XML literal). It will waste a bit of time though.
3382              
3383             =item 1. Parse the RDF/XML.
3384              
3385             The parser will parse the RDF/XML properly. If named graphs are
3386             enabled, any triples will be added to a separate graph. This is
3387             the behaviour that SVG Tiny 1.2 seems to suggest is the correct
3388             thing to do.
3389              
3390             =item 2. Skip the chunk.
3391              
3392             This will skip over the RDF element entirely, and thus save you a
3393             bit of time.
3394              
3395             =back
3396              
3397             You can decide which path to take by setting the 'embedded_rdfxml'
3398             Config option. For HTML and XHTML, you probably want
3399             to set embedded_rdfxml to '0' (the default) or '2' (a little faster).
3400             For other XML markup languages (e.g. SVG or Atom), then you probably want to
3401             set it to '1'.
3402              
3403             (There's also an option '3' which controls how embedded RDF/XML interacts
3404             with named graphs, but this is only really intended for internal use, parsing
3405             OpenDocument.)
3406              
3407             =head2 Named Graphs
3408              
3409             The parser has support for named graphs within a single RDFa
3410             document. To switch this on, use the 'graph' Config option.
3411              
3412             See also L<http://buzzword.org.uk/2009/rdfa4/spec>.
3413              
3414             The name of the attribute which indicates graph URIs is by
3415             default 'graph', but can be changed using the 'graph_attr'
3416             Config option. This option accepts Clark Notation to specify a
3417             namespaced attribute. By default, the attribute value is
3418             interpreted as like the 'about' attribute (i.e. CURIEs, URIs, etc),
3419             but if you set the 'graph_type' Config option to 'id',
3420             it will be treated as setting a fragment identifier (like the 'id'
3421             attribute).
3422              
3423             The 'graph_default' Config option allows you to set the default
3424             graph URI/bnode identifier.
3425              
3426             Once you're using named graphs, the C<graphs> method becomes
3427             useful: it returns a hashref of { graph_uri => trine_model } pairs.
3428             The optional parameter to the C<graph> method also becomes useful.
3429              
3430             OpenDocument (ZIP) host language support makes internal use
3431             of named graphs, so if you're parsing OpenDocument, tinker with
3432             the graph Config options at your own risk!
3433              
3434             =head2 Auto Config
3435              
3436             RDF::RDFa::Parser has a lot of different Config options to play with. Sometimes it
3437             might be useful to allow the page being parsed to control some of these options.
3438             If you switch on the 'auto_config' Config option, pages can do this.
3439              
3440             A page can set options using a specially crafted E<lt>metaE<gt> tag:
3441              
3442             <meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config"
3443             content="xhtml_lang=1&amp;xml_lang=0" />
3444              
3445             Note that the C<content> attribute is an application/x-www-form-urlencoded
3446             string (which must then be HTML-escaped of course). Semicolons may be used
3447             instead of ampersands, as these tend to look nicer:
3448              
3449             <meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config"
3450             content="xhtml_lang=1;xml_lang=0" />
3451              
3452             It's possible to use auto config outside XHTML (e.g. in Atom or
3453             SVG) using namespaces:
3454              
3455             <xhtml:meta xmlns:xhtml="http://www.w3.org/1999/xhtml"
3456             name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config"
3457             content="xhtml_lang=0;xml_base=2;atom_elements=1" />
3458              
3459             Any Config option may be given using auto config, except 'use_rtnlx', 'dom_parser',
3460             and of course 'auto_config' itself.
3461              
3462             =head2 Profiles
3463              
3464             Support for Profiles (an experimental RDFa 1.1 feature) was added in
3465             version 1.09_00, but dropped after version 1.096, because the feature
3466             was removed from draft specs.
3467              
3468             =head1 BUGS
3469              
3470             RDF::RDFa::Parser 0.21 passed all approved tests in the XHTML+RDFa
3471             test suite at the time of its release.
3472              
3473             RDF::RDFa::Parser 0.22 (used in conjunction with HTML::HTML5::Parser
3474             0.01 and HTML::HTML5::Sanity 0.01) additionally passes all approved
3475             tests in the HTML4+RDFa and HTML5+RDFa test suites at the time of
3476             its release; except test cases 0113 and 0121, which the author of
3477             this module believes mandate incorrect HTML parsing.
3478              
3479             RDF::RDFa::Parser 1.096_01 passes all approved tests on the default
3480             graph (not the processor graph) in the RDFa 1.1 test suite for language
3481             versions 1.0 and host languages xhtml1, html4 and html5, with the
3482             following exceptions which are skipped:
3483              
3484             =over
3485              
3486             =item * B<0140> - wilful violation, pending proof that the test is backed up by the spec.
3487              
3488             =item * B<0198> - an XML canonicalisation test that may be dropped in the future.
3489              
3490             =item * B<0212> - wilful violation, as passing this test would require regressing on the old RDFa 1.0 test suite.
3491              
3492             =item * B<0251> to B<0256> pass with RDFa 1.1 and are skipped in RDFa 1.0 because they use RDFa-1.1-specific syntax.
3493              
3494             =item * B<0256> is additionally skipped in HTML4 mode, as the author believes xml:lang should be ignored in HTML versions prior to HTML5.
3495              
3496             =item * B<0303> - wilful violation, as this feature is simply awful.
3497            
3498             =back
3499              
3500             Please report any bugs to L<http://rt.cpan.org/>.
3501              
3502             Common gotchas:
3503              
3504             =over 8
3505              
3506             =item * Are you using the XML catalogue?
3507              
3508             RDF::RDFa::Parser maintains a locally cached version of the XHTML+RDFa
3509             DTD. This will normally be within your Perl module directory, in a subdirectory
3510             named "auto/share/dist/RDF-RDFa-Parser/catalogue/".
3511             If this is missing, the parser should still work, but will be very slow.
3512              
3513             =back
3514              
3515             =head1 SEE ALSO
3516              
3517             L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module.
3518              
3519             L<RDF::RDFa::Parser::Config>.
3520              
3521             L<XML::LibXML>, L<RDF::Trine>, L<HTML::HTML5::Parser>, L<HTML::HTML5::Sanity>,
3522             L<RDF::RDFa::Generator>, L<RDF::RDFa::Linter>.
3523              
3524             L<http://www.perlrdf.org/>, L<http://rdfa.info>.
3525              
3526             =head1 AUTHOR
3527              
3528             Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
3529              
3530             =head1 ACKNOWLEDGEMENTS
3531              
3532             Kjetil Kjernsmo E<lt>kjetilk@cpan.orgE<gt> wrote much of the stuff for
3533             building RDF::Trine models. Neubert Joachim taught me to use XML
3534             catalogues, which massively speeds up parsing of XHTML files that have
3535             DTDs.
3536              
3537             =head1 COPYRIGHT AND LICENCE
3538              
3539             Copyright 2008-2012 Toby Inkster
3540              
3541             This is free software; you can redistribute it and/or modify it under
3542             the same terms as the Perl 5 programming language system itself.
3543              
3544             =head1 DISCLAIMER OF WARRANTIES
3545              
3546             THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
3547             WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
3548             MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.