File Coverage

blib/lib/LaTeXML/Post.pm

Criterion	Covered	Total	%
statement	21	21	100.0
branch			n/a
condition			n/a
subroutine	7	7	100.0
pod			n/a
total	28	28	100.0

line	stmt	sub	time	code
1				# /=====================================================================\ #
2				# \| LaTeXML::Post \| #
3				# \| PostProcessing driver \| #
4				# \|=====================================================================\| #
5				# \| Part of LaTeXML: \| #
6				# \| Public domain software, produced as part of work done by the \| #
7				# \| United States Government & not subject to copyright in the US. \| #
8				# \|---------------------------------------------------------------------\| #
9				# \| Bruce Miller #_# \| #
10				# \| http://dlmf.nist.gov/LaTeXML/ (o o) \| #
11				# \=========================================================ooo==U==ooo=/ #
12
13				package LaTeXML::Post;
14	1	1	4865	use strict;
	1		1
	1		26
15	1	1	3	use warnings;
	1		1
	1		24
16	1	1	505	use Time::HiRes;
	1		1095
	1		3
17	1	1	423	use LaTeXML::Util::Radix;
	1		2
	1		112
18	1	1	472	use Encode;
	1		10901
	1		108
19	1	1	9	use base qw(Exporter);
	1		2
	1		104
20	1	1	7	use base qw(LaTeXML::Common::Object);
	1		2
	1		550
21				use LaTeXML::Global;
22				use LaTeXML::Common::Error;
23				use LaTeXML::Core::State;
24				our @EXPORT = (@LaTeXML::Common::Error::EXPORT);
25
26				sub new {
27				my ($class, %options) = @_;
28				my $self = bless { status => {}, %options }, $class;
29				$$self{verbosity} = 0 unless defined $$self{verbosity};
30				# TEMPORARY HACK!!!!
31				# Create a State object, essentially only to hold verbosity (for now)
32				# so that Errors can be reported, managed and recorded
33				# Eventually will be a "real" State (or other configuration object)
34				$$self{state} = LaTeXML::Core::State->new();
35				$$self{state}->assignValue(VERBOSITY => $$self{verbosity});
36				return $self; }
37
38				#======================================================================
39				sub ProcessChain {
40				my ($self, $doc, @postprocessors) = @_;
41				return $self->withState(sub {
42				return $self->ProcessChain_internal($doc, @postprocessors); }); }
43
44				sub ProcessChain_internal {
45				my ($self, $doc, @postprocessors) = @_;
46				local $LaTeXML::POST = $self;
47				local $LaTeXML::Post::NOTEINFO = undef;
48				local $LaTeXML::Post::DOCUMENT = $doc;
49
50				my @docs = ($doc);
51				NoteBegin("post-processing");
52
53				foreach my $processor (@postprocessors) {
54				local $LaTeXML::Post::PROCESSOR = $processor;
55				my @newdocs = ();
56				foreach my $doc (@docs) {
57				local $LaTeXML::Post::DOCUMENT = $doc;
58				if (my @nodes = grep { $_ } $processor->toProcess($doc)) { # If there are nodes to process
59				my $n = scalar(@nodes);
60				my $msg = join(' ', $processor->getName \|\| '',
61				$doc->siteRelativeDestination \|\| '',
62				($n > 1 ? "$n to process" : 'processing'));
63				NoteBegin($msg);
64				push(@newdocs, $processor->process($doc, @nodes));
65				NoteEnd($msg); }
66				else {
67				push(@newdocs, $doc); } }
68				@docs = @newdocs; }
69				NoteEnd("post-processing");
70				return @docs; }
71
72				## HACK!!!
73				## This is a copy of withState from LaTeXML::Core.pm
74				## This should eventually be in a higher level, common class
75				## using a common State or configuration object
76				## in order to wrap ALL processing.
77				sub withState {
78				my ($self, $closure) = @_;
79				local $STATE = $$self{state};
80				# And, set fancy error handler for ANY die!
81				local $SIG{__DIE__} = \&LaTeXML::Common::Error::perl_die_handler;
82				local $SIG{INT} = \&LaTeXML::Common::Error::perl_interrupt_handler;
83				local $SIG{__WARN__} = \&LaTeXML::Common::Error::perl_warn_handler;
84				local $SIG{'ALRM'} = \&LaTeXML::Common::Error::perl_timeout_handler;
85				local $SIG{'TERM'} = \&LaTeXML::Common::Error::perl_terminate_handler;
86
87				local $LaTeXML::DUAL_BRANCH = '';
88
89				return &$closure($STATE); }
90
91				sub getStatusCode {
92				my ($self) = @_;
93				return $$self{state}->getStatusCode; }
94
95				sub getStatusMessage {
96				my ($self) = @_;
97				return $$self{state}->getStatusMessage; }
98
99				#======================================================================
100				# "Global" Post processing services
101				#======================================================================
102
103				# Return a sorter appropriate for lang (if Unicode::Collate::Locale available),
104				# or an undifferentiated Unicode sorter (if only Unicode::Collate is available),
105				# or just a dumb stand-in for perl's sort
106				sub getsorter {
107				my ($self, $lang) = @_;
108				my $collator;
109				if ($collator = $$self{collatorcache}{$lang}) { }
110				elsif ($collator = eval {
111				local $LaTeXML::IGNORE_ERRORS = 1;
112				require 'Unicode/Collate/Locale.pm';
113				Unicode::Collate::Locale->new(
114				locale => $lang,
115				variable => 'non-ignorable', # I think; at least space shouldn't be ignored
116				upper_before_lower => 1); }) { }
117				elsif ($collator = eval {
118				local $LaTeXML::IGNORE_ERRORS = 1;
119				require 'Unicode/Collate.pm';
120				Unicode::Collate->new(
121				variable => 'non-ignorable', # I think; at least space shouldn't be ignored
122				upper_before_lower => 1); }) {
123				Info('expected', 'Unicode::Collate::Locale', undef,
124				"No Unicode::Collate::Locale found;",
125				"using Unicode::Collate; ignoring language='$lang'"); }
126				else {
127				# Otherwise, just use primitive codepoint ordering.
128				$collator = LaTeXML::Post::DumbCollator->new();
129				Info('expected', 'Unicode::Collate::Locale', undef,
130				"No Unicode::Collate::Locale or Unicode::Collate",
131				"using perl's sort; ignoring language='$lang'"); }
132				$$self{collatorcache}{$lang} = $collator;
133				return $collator; }
134
135				#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
136				package LaTeXML::Post::DumbCollator;
137				use strict;
138
139				sub new {
140				my ($class) = @_;
141				return bless {}, $class; }
142
143				sub sort {
144				my ($self, @things) = @_;
145				return (sort @things); }
146
147				#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148				package LaTeXML::Post::Processor;
149				use strict;
150				use LaTeXML::Post;
151				use LaTeXML::Common::Error;
152				use LaTeXML::Common::XML;
153				use LaTeXML::Util::Pathname;
154				use base qw(LaTeXML::Common::Object);
155
156				# An Abstract Post Processor
157				sub new {
158				my ($class, %options) = @_;
159				my $self = bless {%options}, $class;
160				$$self{verbosity} = 0 unless defined $$self{verbosity};
161				$$self{resource_directory} = $options{resource_directory};
162				$$self{resource_prefix} = $options{resource_prefix};
163				my $name = $class; $name =~ s/^LaTeXML::Post:://;
164				$$self{name} = $name;
165				return $self; }
166
167				sub getName {
168				my ($self) = @_;
169				return $$self{name}; }
170
171				# Return the nodes to be processed; by default the document element.
172				# This allows processors to focus on specific kinds of nodes,
173				# or to skip processing if there are none to process.
174				sub toProcess {
175				my ($self, $doc) = @_;
176				return $doc->getDocumentElement; }
177
178				# This must be defined to do whatever processing is needed to @toprocess nodes.
179				sub process {
180				my ($self, $doc, @toprocess) = @_;
181				Fatal("misdefined", $self, $doc, "This post-processor is abstract; does not implement ->process");
182				return $doc; }
183
184				#======================================================================
185				# Some postprocessors will want to create a bunch of "resource"s,
186				# such as generated or transformed image files, or other data files.
187				# These should return a pathname, relative to the document's destination,
188				# for storing a resource associated with $node.
189				# Will use the Post option resource_directory
190				sub desiredResourcePathname {
191				my ($self, $doc, $node, $source, $type) = @_;
192				return; }
193
194				# Ideally this would return a pathname relative to the document
195				# but I think we've accommodated absolute ones.
196				sub generateResourcePathname {
197				my ($self, $doc, $node, $source, $type) = @_;
198				my $subdir = $$self{resource_directory} \|\| '';
199				my $prefix = $$self{resource_prefix} \|\| "x";
200				my $counter = join('_', "_max", $subdir, $prefix, "counter_");
201				my $n = $doc->cacheLookup($counter) \|\| 0;
202				my $name = $prefix . ++$n;
203				$doc->cacheStore($counter, $n);
204				return pathname_make(dir => $subdir, name => $name, type => $type); }
205
206				#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
207				package LaTeXML::Post::MathProcessor;
208				use strict;
209				use LaTeXML::Post;
210				use LaTeXML::Common::Error;
211				use base qw(LaTeXML::Post::Processor);
212				use LaTeXML::Common::XML;
213
214				# This is an abstract class; A complete MathProcessor will need to define:
215				# $self->convertNode($doc,$xmath)
216				# to generate the converted math node
217				# $self->combineParallel($doc,$math,$primary,@secondaries)
218				# to combine the $primary (the result of $self's conversion of $math)
219				# with the results of other math processors to create the
220				# parallel markup appropriate for this processor's markup.
221				# $self->rawIDSuffix returns a short string to append to id's for nodes
222				# using this markup.
223
224				# Top level processing finds and converts all top-level math nodes.
225				# Any nested mathnodes would only appear within ltx:XMText;
226				# postprocessors must handle nested math as appropriate (but see convertXMTextContent)
227				# Invokes preprocess on each before doing the conversion in case
228				# analysis is needed.
229				sub toProcess {
230				my ($self, $doc) = @_;
231				return $doc->findnodes('//ltx:Math[not(ancestor::ltx:Math)]'); }
232
233				sub process {
234				my ($self, $doc, @maths) = @_;
235				local $LaTeXML::Post::MATHPROCESSOR = $self;
236				$doc->markXMNodeVisibility;
237				$self->preprocess($doc, @maths);
238				if ($$self{parallel}) {
239				my @secondaries = @{ $$self{secondary_processors} };
240				# What's the right test for when cross-referencing should be done?
241				# For now: only when the primary and some secondary can cross-ref
242				# (otherwise, end up with peculiar structures?)
243				my ($proc1, @ignore) = grep { $_->can('addCrossref') } @secondaries;
244				if ($self->can('addCrossref') && $proc1) {
245				$$self{crossreferencing} = 1; # We'll need ID's!
246				$$proc1{crossreferencing} = 1; }
247				foreach my $proc (@secondaries) {
248				local $LaTeXML::Post::MATHPROCESSOR = $proc;
249				$proc->preprocess($doc, @maths); } }
250				# Re-Fetch the math nodes, in case preprocessing has messed them up!!!
251				@maths = $self->toProcess($doc);
252
253				## Do in reverse, since (in LaTeXML) we allow math nested within text within math.
254				## So, we want to converted any nested expressions first, so they get carried along
255				## with the outer ones.
256				my $n = 0;
257				foreach my $math (reverse(@maths)) {
258				# If parent is MathBranch, which branch number is it?
259				# (note: the MathBranch will be in a ltx:MathFork, with a ltx:Math being 1st child)
260				my @preceding = $doc->findnodes("parent::ltx:MathBranch/preceding-sibling::*", $math);
261				local $LaTeXML::Post::MathProcessor::FORK = scalar(@preceding);
262				$self->processNode($doc, $math);
263				$n++; }
264
265				# Experimentally, cross reference ??? (or clearer name?)
266				if ($$self{parallel}) {
267				# There could be various strategies when there are more than 2 parallel conversions,
268				# eg a cycle or something....
269				# Here, we simply take the first two processors that know how to addCrossref
270				# and connect their nodes to each other.
271				my ($proc1, $proc2, @ignore)
272				= grep { $_->can('addCrossref') } $self, @{ $$self{secondary_processors} };
273				if ($proc1 && $proc2) {
274				# First, prepare a list of all Math id's, in document order, to simplify crossreferencing
275				my $ids = {};
276				my $pos = 0;
277				foreach my $n ($doc->findnodes('descendant-or-self::ltx:Math/descendant::*[@xml:id]')) {
278				$$ids{ $n->getAttribute('xml:id') } = $pos++; }
279				$$proc1{crossreferencing_ids} = $ids;
280				$$proc2{crossreferencing_ids} = $ids;
281				# Now do cross referencing
282				$proc1->addCrossrefs($doc, $proc2);
283				$proc2->addCrossrefs($doc, $proc1); } }
284				NoteProgressDetailed(" [converted $n Maths]");
285				return $doc; }
286
287				# Make THIS MathProcessor the primary branch (of whatever parallel markup it supports),
288				# and make all of the @moreprocessors be secondary ones.
289				sub setParallel {
290				my ($self, @moreprocessors) = @_;
291				if (@moreprocessors) {
292				$$self{parallel} = 1;
293				map { $$_{is_secondary} = 1 } @moreprocessors; # Mark the others as secondary
294				$$self{secondary_processors} = [@moreprocessors];
295				$$self{name} .= '[w/' . join('+', map { $_->getName } @moreprocessors) . ']'; }
296				else {
297				$$self{parallel} = 0; }
298				return; }
299
300				# Optional; if you want to do anything before translation
301				sub preprocess {
302				my ($self, $doc, @nodes) = @_;
303				return; }
304
305				# $self->processNode($doc,$mathnode) is the top-level conversion
306				# It converts the XMath within $mathnode, and adds it to the $mathnode,
307				# This invokes $self->convertNode($doc,$xmath) to get the conversion.
308				sub processNode {
309				my ($self, $doc, $math) = @_;
310				my $xmath = $doc->findnode('ltx:XMath', $math);
311				return unless $xmath; # Nothing to convert if there's no XMath ... !
312				local $LaTeXML::Post::MATHPROCESSOR = $self;
313				my $conversion;
314				# XMath will be removed (LATER!), but mark its ids as reusable.
315				$doc->preremoveNodes($xmath);
316				if ($$self{parallel}) {
317				my $primary = $self->convertNode($doc, $xmath);
318				my @secondaries = ();
319				foreach my $proc (@{ $$self{secondary_processors} }) {
320				local $LaTeXML::Post::MATHPROCESSOR = $proc;
321				my $secondary = $proc->convertNode($doc, $xmath);
322				# IF it is (first) image, copy image attributes to ltx:Math ???
323				$self->maybeSetMathImage($math, $secondary);
324				push(@secondaries, $secondary); }
325				$conversion = $self->combineParallel($doc, $xmath, $primary, @secondaries); }
326				else {
327				$conversion = $self->convertNode($doc, $xmath);
328				$self->maybeSetMathImage($math, $conversion); }
329				# we now REMOVE the ltx:XMath from the ltx:Math, and whitespace
330				# (if there's an XMath PostProcessing module, it will add it back, with appropriate id's)
331				if (my $xml = $$conversion{xml}) {
332				$$conversion{xml} = $self->outerWrapper($doc, $xmath, $xml); }
333				$doc->removeNodes($xmath);
334				# NOTE: Unless XMath is the primary, (preserving the XMath, w/no IDSuffix)
335				# we've got to remove the id's from the XMath, since the primary will get same id's
336				# and (some versions) of libxml2 complain!
337				if ($$conversion{mimetype} && ($$conversion{mimetype} ne 'application/x-latexml')) {
338				map { $_->removeAttribute('xml:id') }
339				$doc->findnodes('descendant-or-self::*[@xml:id]', $xmath); }
340				$doc->removeBlankNodes($math);
341				if (my $new = $$conversion{xml}) {
342				$doc->addNodes($math, $new); }
343				# else ?
344				return; }
345
346				sub maybeSetMathImage {
347				my ($self, $math, $conversion) = @_;
348				if ((($$conversion{mimetype} \|\| '') =~ /^image\//) # Got an image?
349				&& !$math->getAttribute('imagesrc')) { # and it's the first one
350				if (my $src = $$conversion{src}) {
351				$math->setAttribute(imagesrc => $src);
352				$math->setAttribute(imagewidth => $$conversion{width});
353				$math->setAttribute(imageheight => $$conversion{height});
354				$math->setAttribute(imagedepth => $$conversion{depth}); } }
355				return; }
356
357				# NOTE: Sort out how parallel & outerWrapper should work.
358				# It probably ought to be that if the conversion is being embedded in
359				# something from another namespace, it needs the wrapper.
360				# ie. when mixing parallel markups, NOT just at the top level, although certainly there too.
361				#
362				# This should wrap the resulting conversion with m:math or om:OMA or whatever appropriate?
363				sub outerWrapper {
364				my ($self, $doc, $xmath, $conversion) = @_;
365				return $conversion; }
366
367				# This should proably be from the core of the current ->processNode
368				sub convertNode {
369				my ($self, $doc, $node) = @_;
370				Fatal('misdefined', (ref $self), undef,
371				"Abstract package: math conversion has not been defined for this MathProcessor");
372				return; }
373
374				# This should be implemented by potential Primaries
375				# Maybe the caller of this should check the namespaces, and call wrapper if needed?
376				sub combineParallel {
377				my ($self, $doc, $xmath, $primary, @secondaries) = @_;
378				LaTeXML::Post::Error('misdefined', (ref $self), undef,
379				"Abstract package: combining parallel markup has not been defined for this MathProcessor",
380				"dropping the extra markup from: " . join(',', map { $$_{processor} } @secondaries));
381				return $primary; }
382
383				# A helper for converting XMText
384				# ltx:XMText escapes back to general ltx markup; the only element within XMath that does.
385				# BUT it can contain nested ltx:Math!
386				# When converting to (potentially parallel markup, coarse-grained),
387				# the non-math needs to be duplicated, but with the ID's modified,
388				# AND the nested math needs to be converted to ONLY the current target's markup
389				# NOT parallel within each nested math, although it should still be cross-referencable to others!
390				# moreover, the math will need the outerWrapper.
391				my $NBSP = pack('U', 0xA0); # CONSTANT
392
393				sub convertXMTextContent {
394				my ($self, $doc, $convertspaces, @nodes) = @_;
395				my @result = ();
396				foreach my $node (@nodes) {
397				if ($node->nodeType == XML_TEXT_NODE) {
398				my $string = $node->textContent;
399				if ($convertspaces) {
400				$string =~ s/^\s+/$NBSP/; $string =~ s/\s+$/$NBSP/; }
401				push(@result, $string); }
402				else {
403				my $tag = $doc->getQName($node);
404				if ($tag eq 'ltx:XMath') {
405				my $conversion = $self->convertNode($doc, $node);
406				my $xml = $$conversion{xml};
407				# And if no xml ????
408				push(@result, $self->outerWrapper($doc, $node, $xml)); }
409				else {
410				my %attr = ();
411				foreach my $attr ($node->attributes) {
412				my $atype = $attr->nodeType;
413				if ($atype == XML_ATTRIBUTE_NODE) {
414				my $key = $attr->nodeName;
415				my $value = $attr->getValue;
416				if ($key =~ /^_/) { } # don't copy internal attributes ???
417				elsif ($key eq 'xml:id') { } # ignore; we'll handle fragid???
418				elsif ($key eq 'fragid') {
419				my $id = $doc->uniquifyID($value, $self->IDSuffix);
420				$attr{'xml:id'} = $id; }
421				else {
422				$attr{$key} = $attr->value; } } }
423				# Probably should invoke associateNode ???
424				push(@result,
425				[$tag, {%attr}, $self->convertXMTextContent($doc, $convertspaces, $node->childNodes)]); } } }
426				return @result; }
427
428				# When converting an XMath node (with an id) to some other format,
429				# we will generate an id for the new node.
430				# This method returns a suffix to be added to the XMath id.
431				# The primary format gets the id's unchanged, but secondary ones get a suffix (eg. ".pmml")
432				sub IDSuffix {
433				my ($self) = @_;
434				return ($$self{is_secondary} ? $self->rawIDSuffix : ''); }
435
436				sub rawIDSuffix {
437				return ''; }
438
439				# In order to do cross-referencing betweeen formats, and to relate semantic/presentation
440				# information to content/presentation nodes (resp), we want to associate each
441				# generated node (MathML, OpenMath,...) with a "source" XMath node "responsible" for it's generation.
442				# This is often the "current" XMath node that was being converted, but sometimes
443				# * the containing XMDual (which makes more sense when we've generated a "container")
444				# * the containing XMDual's semantic operator (makes more sense when we're generating
445				# tokens that are only visible from the presentation branch)
446				sub associateNode {
447				my ($self, $node, $currentnode, $noxref) = @_;
448				my $r = ref $node;
449				return unless $currentnode && $r && ($r eq 'ARRAY' \|\| $r eq 'XML::LibXML::Element');
450				my $document = $LaTeXML::Post::DOCUMENT;
451				# Check if already associated with a source node
452				my $isarray = ref $node eq 'ARRAY';
453				# What kind of branch are we generating?
454				my $ispresentation = $self->rawIDSuffix eq '.pmml'; # TEMPORARY HACK: BAAAAD method!
455				my $iscontainer = 0;
456				if ($isarray) {
457				return if $$node[1]{'_sourced'};
458				$$node[1]{'_sourced'} = 1;
459				my ($tag, $attr, @children) = @$node;
460				$iscontainer = grep { ref $_ } @children; }
461				else {
462				return if $node->getAttribute('_sourced');
463				$node->setAttribute('_sourced' => 1);
464				$iscontainer = scalar(element_nodes($node)); }
465				my $sourcenode = $currentnode;
466				# If the generated node is a "container" (non-token!), use the container as source
467				if ($iscontainer) {
468				if (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) {
469				$sourcenode = $container; } }
470				# If the current node is appropriately visible, use it.
471				elsif ($currentnode->getAttribute(($ispresentation ? '_cvis' : '_pvis'))) { }
472				# Else (current node isn't visible); try to find content OPERATOR
473				elsif (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) {
474				my ($op) = element_nodes($container);
475				my $q = $document->getQName($op) \|\| 'unknown';
476				if ($q eq 'ltx:XMTok') { }
477				elsif ($q eq 'ltx:XMApp') {
478				($op) = element_nodes($op);
479				if ($document->getQName($op) eq 'ltx:XMRef') {
480				$op = $document->realizeXMNode($op); } }
481				if ($op && !$op->getAttribute('_pvis')) {
482				$sourcenode = $op; }
483				else {
484				$sourcenode = $container; } }
485				# If we're intending to cross-reference, then source & generated nodes will need ID's
486				if ($$self{crossreferencing}) {
487				if (!$noxref && !$sourcenode->getAttribute('fragid')) { # If no ID, but need one
488				$document->generateNodeID($sourcenode, '', 1); } # but the ID is reusable
489				if (my $sourceid = $sourcenode->getAttribute('fragid')) { # If source has ID
490				my $nodeid = $currentnode->getAttribute('fragid') \|\| $sourceid;
491				my $id = $document->uniquifyID($nodeid, $self->IDSuffix);
492				if ($isarray) {
493				$$node[1]{'xml:id'} = $id; }
494				else {
495				$node->setAttribute('xml:id' => $id); }
496				push(@{ $$self{convertedIDs}{$sourceid} }, $id) unless $noxref; } }
497				$self->associateNodeHook($node, $sourcenode, $noxref);
498				if ($isarray) { # Array represented
499				map { $self->associateNode($_, $currentnode, $noxref) } @$node[2 .. $#$node]; }
500				else { # LibXML node
501				map { $self->associateNode($_, $currentnode, $noxref) } element_nodes($node); }
502				return; }
503
504				# Customization hook for adding other attributes to the generated math nodes.
505				sub associateNodeHook {
506				my ($self, $node, $sourcenode, $noxref) = @_;
507				return; }
508
509				sub shownode {
510				my ($node, $level) = @_;
511				$level = 0 unless defined $level;
512				my $ref = ref $node;
513				if ($ref eq 'ARRAY') {
514				my ($tag, $attr, @children) = @$node;
515				return "\n" . (' ' x $level)
516				. '[' . $tag . ',{' . join(',', map { $_ . '=>' . $$attr{$_} } sort keys %$attr) . '},'
517				. join(',', map { shownode($_, $level + 1) } @children) . ']'; }
518				elsif ($ref =~ /^XML/) {
519				return $node->toString; }
520				else {
521				return "$node"; } }
522
523				# Add backref linkages (eg. xref) onto the nodes that $self created (converted from XMath)
524				# to reference those that $otherprocessor created.
525				# NOTE: Subclass MUST define addCrossref($node,$xref_id) to add the
526				# id of the "Other Interesting Node" to the (array represented) xml $node
527				# in whatever fashion the markup for that processor uses.
528				#
529				# This may be another useful place to add a hook?
530				# It would provide the list of cross-refenced nodes in document order
531				# This would allow deciding whether or not to copy foreign attributes or other interesting things
532				sub addCrossrefs {
533				my ($self, $doc, $otherprocessor) = @_;
534				my $selfs_map = $$self{convertedIDs};
535				my $others_map = $$otherprocessor{convertedIDs};
536				my $xrefids = $$self{crossreferencing_ids};
537				foreach my $xid (keys %$selfs_map) { # For each Math id that $self converted
538				if (my $other_ids = $$others_map{$xid}) { # Did $other also convert those ids?
539				my $xref_id = $$other_ids[0];
540				if (scalar(@$other_ids) > 1) { # Find 1st in document order! (order is cached)
541				($xref_id) = sort { $$xrefids{$a} <=> $$xrefids{$b} } @$other_ids; }
542				foreach my $id (@{ $$selfs_map{$xid} }) { # look at each node $self created from $xid
543				if (my $node = $doc->findNodeByID($id)) { # If we find a node,
544				$self->addCrossref($node, $xref_id); } } } } # add a crossref from it to $others's node
545				return; }
546
547				#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
548
549				package LaTeXML::Post::Document;
550				use strict;
551				use LaTeXML::Common::XML;
552				use LaTeXML::Util::Pathname;
553				use LaTeXML::Util::Radix;
554				use DB_File;
555				use Unicode::Normalize;
556				use LaTeXML::Post; # to import error handling...
557				use LaTeXML::Common::Error;
558				use base qw(LaTeXML::Common::Object);
559				our $NSURI = "http://dlmf.nist.gov/LaTeXML";
560				our $XPATH = LaTeXML::Common::XML::XPath->new(ltx => $NSURI);
561
562				# Useful options:
563				# destination = the ultimate destination file for this document to be written.
564				# destinationDirectory = the directory it will be stored in (derived from $destination)
565				# siteDirectory = the root directory of where the entire site will be contained
566				# namespaces = a hash of namespace prefix => namespace uri
567				# namespaceURIs = reverse hash of above.
568				# nocache = a boolean, disables storing of permanent LaTeXML.cache
569				# the cache is used to remember things like image conversions from previous runs.
570				# searchpaths = array of paths to search for other resources
571				# Note that these may not be LaTeXML documents (maybe html or ....)
572				sub new {
573				my ($class, $xmldoc, %options) = @_;
574				my $self = $class->new_internal($xmldoc, %options);
575				$self->setDocument_internal($xmldoc);
576				return $self; }
577
578				sub new_internal {
579				my ($class, $xmldoc, %options) = @_;
580				my %data = ();
581				if (ref $class) { # Cloning!
582				map { $data{$_} = $$class{$_} } keys %$class;
583				$class = ref $class; }
584				map { $data{$_} = $options{$_} } keys %options; # These override.
585				if ((defined $options{destination}) && (!defined $options{destinationDirectory})) {
586				my ($dir, $name, $ext) = pathname_split($data{destination});
587				$data{destinationDirectory} = $dir \|\| '.'; }
588				# Check consistency of siteDirectory (providing there's a destinationDirectory)
589				if ($data{destinationDirectory}) {
590				if ($data{siteDirectory}) {
591				Fatal('unexpected', $data{destinationDirectory}, undef,
592				"The destination directory ($data{destinationDirectory})"
593				. " must be within the siteDirectory ($data{siteDirectory})")
594				unless pathname_is_contained($data{destinationDirectory}, $data{siteDirectory}); }
595				else {
596				$data{siteDirectory} = $data{destinationDirectory}; } }
597				# Start, at least, with our own namespaces.
598				$data{namespaces} = { ltx => $NSURI } unless $data{namespaces};
599				$data{namespaceURIs} = { $NSURI => 'ltx' } unless $data{namespaceURIs};
600				$data{idcache} = {};
601				$data{idcache_reusable} = {};
602				$data{idcache_reserve} = {};
603
604				my $self = bless {%data}, $class;
605				return $self; }
606
607				sub newFromFile {
608				my ($class, $source, %options) = @_;
609				$options{source} = $source;
610				if (!$options{sourceDirectory}) {
611				my ($dir, $name, $ext) = pathname_split($source);
612				$options{sourceDirectory} = $dir \|\| '.'; }
613				my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseFile($source), %options);
614				$doc->validate if $$doc{validate};
615				return $doc; }
616
617				sub newFromString {
618				my ($class, $string, %options) = @_;
619				$options{sourceDirectory} = '.' unless $options{sourceDirectory};
620				my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options);
621				$doc->validate if $$doc{validate};
622				return $doc; }
623
624				sub newFromSTDIN {
625				my ($class, %options) = @_;
626				my $string;
627				{ local $/ = undef; $string = <>; }
628				$options{sourceDirectory} = '.' unless $options{sourceDirectory};
629				my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options);
630				$doc->validate if $$doc{validate};
631				return $doc; }
632
633				#======================================================================
634
635				# This is for creating essentially "sub documents"
636				# that are in some sense children of $self, possibly removed or cloned from it.
637				# And they are presumably LaTeXML documents
638				sub newDocument {
639				my ($self, $root, %options) = @_;
640				my $clone_suffix = $options{clone_suffix};
641				delete $options{clone_suffix};
642				my $doc = $self->new_internal(undef, %options);
643				$doc->setDocument_internal($root, clone_suffix => $clone_suffix);
644
645				if (my $root_id = $self->getDocumentElement->getAttribute('xml:id')) {
646				$$doc{split_from_id} = $root_id; }
647
648				# Copy any processing instructions.
649				foreach my $pi ($self->findnodes(".//processing-instruction('latexml')")) {
650				$doc->getDocument->appendChild($pi->cloneNode); }
651
652				# And any resource elements
653				if (my @resources = $self->findnodes("descendant::ltx:resource")) {
654				$doc->addNodes($doc->getDocumentElement, @resources); } # cloning, as needed...
655
656				# If new document has no date, try to add one
657				$doc->addDate($self);
658
659				# And copy class from the top-level document; This is risky...
660				# We want to preserve global document style information
661				# But some may refer specifically to the document, and NOT to the parts?
662				if (my $class = $self->getDocumentElement->getAttribute('class')) {
663				my $root = $doc->getDocumentElement;
664				my $oclass = $root->getAttribute('class');
665				$root->setAttribute(class => ($oclass ? $oclass . ' ' . $class : $class)); }
666
667				# Finally, return the new document.
668				return $doc; }
669
670				sub setDocument_internal {
671				my ($self, $root, %options) = @_;
672				# Build the document's XML
673				my $roottype = ref $root;
674				if ($roottype eq 'LaTeXML::Core::Document') {
675				$root = $root->getDocument;
676				$roottype = ref $root; }
677				if (my $clone_suffix = $options{clone_suffix}) {
678				if ($roottype eq 'XML::LibXML::Document') {
679				Fatal('internal', 'unimplemented', undef,
680				"Have not yet implemented cloning for entire documents"); }
681				# Just make a clone, and then insert that.
682				$root = $self->cloneNode($root, $clone_suffix); }
683
684				if ($roottype eq 'XML::LibXML::Document') {
685				$$self{document} = $root;
686				foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's
687				$$self{idcache}{ $node->getAttribute('xml:id') } = $node; }
688				# Fetch any additional namespaces from the root
689				foreach my $ns ($root->documentElement->getNamespaces) {
690				my ($prefix, $uri) = ($ns->getLocalName, $ns->getData);
691				if ($prefix) {
692				$$self{namespaces}{$prefix} = $uri unless $$self{namespaces}{$prefix};
693				$$self{namespaceURIs}{$uri} = $prefix unless $$self{namespaceURIs}{$uri}; } }
694
695				# Extract data from latexml's ProcessingInstructions
696				# I'd like to provide structured access to the PI's for those modules that need them,
697				# but it isn't quite clear what that api should be.
698				$$self{processingInstructions} =
699				[map { $_->textContent } $XPATH->findnodes('.//processing-instruction("latexml")', $root)];
700				# Combine specified paths with any from the PI's
701				my @paths = ();
702				@paths = @{ $$self{searchpaths} } if $$self{searchpaths};
703				foreach my $pi (@{ $$self{processingInstructions} }) {
704				if ($pi =~ /^\ssearchpaths\s=\s([\"\'])(.?)\1\s*$/) {
705				push(@paths, split(',', $2)); } }
706				push(@paths, pathname_absolute($$self{sourceDirectory})) if $$self{sourceDirectory};
707				$$self{searchpaths} = [@paths]; }
708				elsif ($roottype eq 'XML::LibXML::Element') {
709				$$self{document} = XML::LibXML::Document->new("1.0", "UTF-8");
710				# Assume we've got any namespaces already ?
711				if (my $parent = $self->findnode('ancestor::*[@id][1]', $root)) {
712				$$self{parent_id} = $parent->getAttribute('xml:id'); }
713				# if no cloning requested, we can just plug the node directly in.
714				# (otherwise, we should use addNodes?)
715				# Seems that only importNode (NOT adopt) works correctly,
716				# PROVIDED we also set the namespace.
717				$$self{document}->setDocumentElement($$self{document}->importNode($root));
718				# $$self{document}->documentElement->setNamespace($root->namespaceURI, $root->prefix, 1);
719				$root->setNamespace($root->namespaceURI, $root->prefix, 1);
720				foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's
721				$$self{idcache}{ $node->getAttribute('xml:id') } = $node; } }
722				elsif ($roottype eq 'ARRAY') {
723				$$self{document} = XML::LibXML::Document->new("1.0", "UTF-8");
724				my ($tag, $attributes, @children) = @$root;
725				my ($prefix, $localname) = $tag =~ /^(.):(.)$/;
726				my $nsuri = $$self{namespaces}{$prefix};
727				my $node = $$self{document}->createElementNS($nsuri, $localname);
728				$$self{document}->setDocumentElement($node);
729				map { $$attributes{$_} && $node->setAttribute($_ => $$attributes{$_}) } keys %$attributes
730				if $attributes;
731
732				if (my $id = $$attributes{'xml:id'}) {
733				$self->recordID($id => $node); }
734				$self->addNodes($node, @children); }
735				else {
736				Fatal('unexpected', $root, undef, "Dont know how to use '$root' as document element"); }
737				return $self; }
738
739				our @MonthNames = (qw( January February March April May June
740				July August September October November December));
741
742				sub addDate {
743				my ($self, $fromdoc) = @_;
744				if (!$self->findnodes('ltx:date', $self->getDocumentElement)) {
745				my @dates;
746				# $fromdoc's document has some, so copy them.
747				if ($fromdoc && (@dates = $fromdoc->findnodes('ltx:date', $fromdoc->getDocumentElement))) {
748				$self->addNodes($self->getDocumentElement, @dates); }
749				else {
750				my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time());
751				$self->addNodes($self->getDocumentElement,
752				['ltx:date', { role => 'creation' },
753				$MonthNames[$mon] . " " . $mday . ", " . (1900 + $year)]); } }
754				return; }
755
756				#======================================================================
757				# Accessors
758
759				sub getDocument {
760				my ($self) = @_;
761				return $$self{document}; }
762
763				sub getDocumentElement {
764				my ($self) = @_;
765				return $$self{document}->documentElement; }
766
767				sub getSource {
768				my ($self) = @_;
769				return $$self{source}; }
770
771				sub getSourceDirectory {
772				my ($self) = @_;
773				return $$self{sourceDirectory} \|\| '.'; }
774
775				sub getSearchPaths {
776				my ($self) = @_;
777				return @{ $$self{searchpaths} }; }
778
779				sub getDestination {
780				my ($self) = @_;
781				return $$self{destination}; }
782
783				sub getDestinationDirectory {
784				my ($self) = @_;
785				return $$self{destinationDirectory}; }
786
787				sub getSiteDirectory {
788				my ($self) = @_;
789				return $$self{siteDirectory}; }
790
791				# Given an absolute pathname in the document destination directory,
792				# return the corresponding pathname relative to the site directory (they maybe different!).
793				sub siteRelativePathname {
794				my ($self, $pathname) = @_;
795				return (defined $pathname ? pathname_relative($pathname, $$self{siteDirectory}) : undef); }
796
797				sub siteRelativeDestination {
798				my ($self) = @_;
799				return (defined $$self{destination}
800				? pathname_relative($$self{destination}, $$self{siteDirectory})
801				: undef); }
802
803				sub getParentDocument {
804				my ($self) = @_;
805				return $$self{parentDocument}; }
806
807				sub getAncestorDocument {
808				my ($self) = @_;
809				my ($doc, $d) = $self;
810				while ($d = $$doc{parentDocument}) {
811				$doc = $d; }
812				return $doc; }
813
814				sub toString {
815				my ($self) = @_;
816				return $$self{document}->toString(1); }
817
818				sub getDestinationExtension {
819				my ($self) = @_;
820				return ($$self{destination} =~ /\.([^\.\/]*)$/ ? $1 : undef); }
821
822				sub checkDestination {
823				my ($self, $reldest) = @_;
824				# make absolute (if not already absolute), hopefully in destination directory.
825				my $dest = pathname_absolute($reldest, $self->getDestinationDirectory);
826				if (my $destdir = pathname_directory($dest)) {
827				pathname_mkdir($destdir)
828				or return Fatal("I/O", $destdir, undef,
829				"Could not create directory $destdir for $reldest: $!"); }
830				return $dest; }
831
832				sub stringify {
833				my ($self) = @_;
834				return 'Post::Document[' . $self->siteRelativeDestination . ']'; }
835
836				#======================================================================
837				sub validate {
838				my ($self) = @_;
839				# Check for a RelaxNGSchema PI
840				my $schema;
841				foreach my $pi (@{ $$self{processingInstructions} }) {
842				if ($pi =~ /^\sRelaxNGSchema\s=\s([\"\'])(.?)\1\s*$/) {
843				$schema = $2; } }
844				if ($schema) { # Validate using rng
845				my $rng = LaTeXML::Common::XML::RelaxNG->new($schema, searchpaths => [$self->getSearchPaths]);
846				LaTeXML::Post::Error('I/O', $schema, undef, "Failed to load RelaxNG schema $schema" . "Response was: $@")
847				unless $rng;
848				my $v = eval {
849				local $LaTeXML::IGNORE_ERRORS = 1;
850				$rng->validate($$self{document}); };
851				LaTeXML::Post::Error("malformed", 'document', undef,
852				"Document fails RelaxNG validation (" . $schema . ")",
853				"Validation reports: " . $@) if $@ \|\| !defined $v; }
854				elsif (my $decldtd = $$self{document}->internalSubset) { # Else look for DTD Declaration
855				my $dtd = XML::LibXML::Dtd->new($decldtd->publicId, $decldtd->systemId);
856				if (!$dtd) {
857				LaTeXML::Post::Error("I/O", $decldtd->publicId, undef,
858				"Failed to load DTD " . $decldtd->publicId . " at " . $decldtd->systemId,
859				"skipping validation"); }
860				else {
861				my $v = eval {
862				local $LaTeXML::IGNORE_ERRORS = 1;
863				$$self{document}->validate($dtd); };
864				LaTeXML::Post::Error("malformed", 'document', undef,
865				"Document failed DTD validation (" . $decldtd->systemId . ")",
866				"Validation reports: " . $@) if $@ \|\| !defined $v; } }
867				else { # Nothing found to validate with
868				LaTeXML::Post::Warn("expected", 'schema', undef,
869				"No Schema or DTD found for this document"); }
870				return; }
871
872				sub idcheck {
873				my ($self) = @_;
874				my %idcache = ();
875				my %dups = ();
876				my %missing = ();
877				foreach my $node ($self->findnodes("//*[\@xml:id]")) {
878				my $id = $node->getAttribute('xml:id');
879				$dups{$id} = 1 if $idcache{$id};
880				$idcache{$id} = 1; }
881				foreach my $id (keys %{ $$self{idcache} }) {
882				$missing{$id} = 1 unless $idcache{$id}; }
883				LaTeXML::Post::Warn("unexpected", 'ids', undef,
884				"IDs were duplicated in cache for " . $self->siteRelativeDestination,
885				join(',', keys %dups))
886				if keys %dups;
887				LaTeXML::Post::Warn("expected", 'ids', undef, "IDs were cached for " . $self->siteRelativeDestination
888				. " but not in document",
889				join(',', keys %missing))
890				if keys %missing;
891				return; }
892
893				#======================================================================
894				sub findnodes {
895				my ($self, $path, $node) = @_;
896				return $XPATH->findnodes($path, $node \|\| $$self{document}); }
897
898				# Similar but returns only 1st node
899				sub findnode {
900				my ($self, $path, $node) = @_;
901				my ($first) = $XPATH->findnodes($path, $node \|\| $$self{document});
902				return $first; }
903
904				sub findvalue {
905				my ($self, $path, $node) = @_;
906				return $XPATH->findvalue($path, $node \|\| $$self{document}); }
907
908				sub addNamespace {
909				my ($self, $nsuri, $prefix) = @_;
910				if (!$$self{namespaces}{$prefix} \|\| ($$self{namespaces}{$prefix} ne $nsuri)
911				\|\| (($self->getDocumentElement->lookupNamespacePrefix($nsuri) \|\| '') ne $prefix)) {
912				$$self{namespaces}{$prefix} = $nsuri;
913				$$self{namespaceURIs}{$nsuri} = $prefix;
914				$XPATH->registerNS($prefix => $nsuri);
915				$self->getDocumentElement->setNamespace($nsuri, $prefix, 0); }
916				return; }
917
918				use Carp;
919
920				sub getQName {
921				my ($self, $node) = @_;
922				if (ref $node eq 'ARRAY') {
923				return $$node[0]; }
924				elsif (ref $node eq 'XML::LibXML::Element') {
925				my $nsuri = $node->namespaceURI;
926				if (!$nsuri) { # No namespace at all???
927				if ($node->nodeType == XML_ELEMENT_NODE) {
928				return $node->localname; }
929				else {
930				return; } }
931				elsif (my $prefix = $$self{namespaceURIs}{$nsuri}) {
932				return $prefix . ":" . $node->localname; }
933				else {
934				# Hasn't got one; we'll create a prefix for internal use.
935				my $prefix = "_ns" . (1 + scalar(grep { /^_ns\d+$/ } keys %{ $$self{namespaces} }));
936				# Register it, but Don't add it to the document!!! (or xpath, for that matter)
937				$$self{namespaces}{$prefix} = $nsuri;
938				$$self{namespaceURIs}{$nsuri} = $prefix;
939				return $prefix . ":" . $node->localname; } }
940				else {
941				# confess "What's this? $node\n";
942				return; } }
943
944				#======================================================================
945				# ADD nodes to $node in the document $self.
946				# This takes a convenient recursive reprsentation for xml:
947				# data = string \| [$tagname, {attr=>value,..}, @children...]
948				# The $tagname should have a namespace prefix whose URI has been
949				# registered with addNamespace.
950
951				# Note that we're currently ignoring duplicated ids.
952				# these should only happen from rearrangement and copying of document fragments
953				# with embedded bits of math in them, which have those XMTok/XMRef pairs.
954				# If those are the cases, we should end up finding the original id'd item, anyway, right?
955				#
956				# NOTE that only XML::LibXML's addNewChild deals cleanly with namespaces
957				# and since there is only an "add" (ie. append) version (not prepend, insert after, etc)
958				# we have to orient everything towards appending.
959				# In particular, see the perversity in the following few methods.
960				sub addNodes {
961				my ($self, $node, @data) = @_;
962				foreach my $child (@data) {
963				if (ref $child eq 'ARRAY') {
964				my ($tag, $attributes, @children) = @$child;
965				if ($tag eq '_Fragment_') {
966				my $indent; # Derive indentation from indentation of $node
967				if (my $pre = $node->previousSibling) {
968				if (($pre->nodeType == XML_TEXT_NODE) && (($pre = $pre->textContent) =~ /^\s*$/)) {
969				$indent = $pre . ' '; } }
970				if ($indent) {
971				$self->addNodes($node, map { ($indent, $_) } @children); }
972				else {
973				$self->addNodes($node, @children); } }
974				else {
975				my ($prefix, $localname) = $tag =~ /^(.):(.)$/;
976				my $nsuri = $prefix && $$self{namespaces}{$prefix};
977				LaTeXML::Post::Warn('expected', 'namespace', undef, "No namespace on '$tag'") unless $nsuri;
978				my $new;
979				if (ref $node eq 'LibXML::XML::Document') {
980				$new = $node->createElementNS($nsuri, $localname);
981				$node->setDocumentElement($new); }
982				else {
983				$new = $node->addNewChild($nsuri, $localname); }
984				if ($attributes) {
985				foreach my $key (sort keys %$attributes) {
986				next unless defined $$attributes{$key};
987				next if $key =~ /^_/; # Ignore internal attributes
988				my ($attrprefix, $attrname) = $key =~ /^(.):(.)$/;
989				my $value = $$attributes{$key};
990				if ($key eq 'xml:id') {
991				if (defined $$self{idcache}{$value}) { # Duplicated ID ?!?!
992				my $newid = $self->uniquifyID($value);
993				Info('unexpected', 'duplicate_id', undef,
994				"Duplicated id=$value using $newid " . ($$self{destination} \|\| ''));
995				$value = $newid; }
996				$self->recordID($value => $new);
997				$new->setAttribute($key, $value); }
998				elsif ($attrprefix && ($attrprefix ne 'xml')) {
999				my $attrnsuri = $attrprefix && $$self{namespaces}{$attrprefix};
1000				$new->setAttributeNS($attrnsuri, $key, $$attributes{$key}); }
1001				else {
1002				$new->setAttribute($key, $$attributes{$key}); } } }
1003				$self->addNodes($new, @children); } }
1004				elsif ((ref $child) =~ /^XML::LibXML::/) {
1005				my $type = $child->nodeType;
1006				if ($type == XML_ELEMENT_NODE) {
1007				# Note: this isn't actually much slower than $node->appendChild($child) !
1008				my $nsuri = $child->namespaceURI;
1009				my $localname = $child->localname;
1010				my $new;
1011				if (ref $node eq 'LibXML::XML::Document') {
1012				$new = $node->createElementNS($nsuri, $localname);
1013				$node->setDocumentElement($new); }
1014				else {
1015				$new = $node->addNewChild($nsuri, $localname); }
1016				foreach my $attr ($child->attributes) {
1017				my $atype = $attr->nodeType;
1018				if ($atype == XML_ATTRIBUTE_NODE) {
1019				my $key = $attr->nodeName;
1020				if ($key =~ /^_/) { } # don't copy internal attributes
1021				elsif ($key eq 'xml:id') {
1022				my $value = $attr->getValue;
1023				my $old;
1024				if ((defined($old = $$self{idcache}{$value})) # if xml:id was already used
1025				&& !$old->isSameNode($child)) { # and the node was a different one
1026				my $newid = $self->uniquifyID($value);
1027				Info('unexpected', 'duplicate_id', undef,
1028				"Duplicated id=$value using $newid " . ($$self{destination} \|\| ''));
1029				$value = $newid; }
1030				$self->recordID($value => $new);
1031				$new->setAttribute($key, $value); }
1032				elsif (my $ns = $attr->namespaceURI) {
1033				$new->setAttributeNS($ns, $attr->name, $attr->getValue); }
1034				else {
1035				$new->setAttribute($attr->localname, $attr->getValue); } }
1036				}
1037				$self->addNodes($new, $child->childNodes); }
1038				elsif ($type == XML_DOCUMENT_FRAG_NODE) {
1039				$self->addNodes($node, $child->childNodes); }
1040				elsif ($type == XML_TEXT_NODE) {
1041				$node->appendTextNode($child->textContent); }
1042				}
1043				elsif (ref $child) {
1044				LaTeXML::Post::Warn('misdefined', $child, undef, "Dont know how to add $child to $node; ignoring"); }
1045				elsif (defined $child) {
1046				$node->appendTextNode($child); } }
1047				return; }
1048
1049				# Remove @nodes from the document
1050				# Allow the nodes to be array form with possibly nested XML that needs to be removed.
1051				sub removeNodes {
1052				my ($self, @nodes) = @_;
1053				foreach my $node (@nodes) {
1054				my $ref = ref $node;
1055				if (!$ref) { }
1056				elsif ($ref eq 'ARRAY') {
1057				my ($t, $a, @n) = @$node;
1058				if (my $id = $$a{'xml:id'}) {
1059				if ($$self{idcache}{$id}) {
1060				delete $$self{idcache}{$id}; } }
1061				$self->removeNodes(@n); }
1062				elsif ($ref =~ /^XML::LibXML::/) {
1063				if ($node->nodeType == XML_ELEMENT_NODE) {
1064				foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) {
1065				my $id = $idd->getAttribute('xml:id');
1066				if ($$self{idcache}{$id}) {
1067				delete $$self{idcache}{$id}; } } }
1068				$node->unlinkNode; } }
1069				return; }
1070
1071				# These nodes will be removed, but later
1072				# So mark all id's in these trees as reusable
1073				sub preremoveNodes {
1074				my ($self, @nodes) = @_;
1075				foreach my $node (@nodes) {
1076				my $ref = ref $node;
1077				if (!$ref) { }
1078				elsif ($ref eq 'ARRAY') {
1079				my ($t, $a, @n) = @$node;
1080				if (my $id = $$a{'xml:id'}) {
1081				$$self{idcache_reusable}{$id} = 1; }
1082				$self->preremoveNodes(@n); }
1083				elsif ($ref =~ /^XML::LibXML::/) {
1084				if ($node->nodeType == XML_ELEMENT_NODE) {
1085				foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) {
1086				my $id = $idd->getAttribute('xml:id');
1087				$$self{idcache_reusable}{$id} = 1; } } } }
1088				return; }
1089
1090				sub removeBlankNodes {
1091				my ($self, $node) = @_;
1092				my $n = 0;
1093				foreach my $child ($node->childNodes) {
1094				if (($child->nodeType == XML_TEXT_NODE) && ($child->textContent =~ /^\s*$/)) {
1095				$node->removeChild($child); $n++; } }
1096				return $n; }
1097
1098				# Replace $node by @replacements in the document
1099				sub replaceNode {
1100				my ($self, $node, @replacements) = @_;
1101				my ($parent, $following) = ($node->parentNode, undef);
1102				# Note that since we can only append new stuff, we've got to remove the following first.
1103				my @save = ();
1104				while (($following = $parent->lastChild) && ($$following != $$node)) { # Remove & Save following siblings.
1105				unshift(@save, $parent->removeChild($following)); }
1106				$self->removeNodes($node);
1107				$self->addNodes($parent, @replacements);
1108				map { $parent->appendChild($_) } @save; # Put these back.
1109				return; }
1110
1111				# Put @nodes at the beginning of $node.
1112				sub prependNodes {
1113				my ($self, $node, @nodes) = @_;
1114				my @save = ();
1115				# Note that since we can only append new stuff, we've got to remove the following first.
1116				while (my $last = $node->lastChild) { # Remove, but save, all children
1117				unshift(@save, $node->removeChild($last)); }
1118				$self->addNodes($node, @nodes); # Now, add the new nodes.
1119				map { $node->appendChild($_) } @save; # Put these back.
1120				return; }
1121
1122				# Clone a node, but adjusting it so that it has unique id's.
1123				# $document->cloneNode($node) or ->cloneNode($node,$idsuffix)
1124				# This clones the node and adjusts any xml:id's within it to be unique.
1125				# Any idref's to those ids will be changed to the new id values.
1126				# If $idsuffix is supplied, it can be a simple string to append to the ids;
1127				# else can be a function of the id to modify it.
1128				# Then each $id is checked to see whether it is unique; If needed,
1129				# one or more letters are appended, until a new id is found.
1130				sub cloneNode {
1131				my ($self, $node, $idsuffix, %options) = @_;
1132				return $node unless ref $node;
1133				my $copy = $node->cloneNode(1);
1134				my $nocache = $options{nocache};
1135				#### $idsuffix = '' unless defined $idsuffix;
1136				# Find all id's defined in the copy and change the id.
1137				my %idmap = ();
1138				foreach my $n ($self->findnodes('descendant-or-self::*[@xml:id]', $copy)) {
1139				my $id = $n->getAttribute('xml:id');
1140				my $newid = $self->uniquifyID($id, $idsuffix);
1141				$idmap{$id} = $newid;
1142				$self->recordID($newid => $n) unless $nocache;
1143				$n->setAttribute('xml:id' => $newid);
1144				if (my $fragid = $n->getAttribute('fragid')) { # GACK!!
1145				$n->setAttribute(fragid => substr($newid, length($id) - length($fragid))); } }
1146
1147				# Now, replace all REFERENCES to those modified ids.
1148				foreach my $n ($self->findnodes('descendant-or-self::*[@idref]', $copy)) {
1149				if (my $id = $idmap{ $n->getAttribute('idref') }) {
1150				$n->setAttribute(idref => $id); } } # use id or fragid?
1151				# Finally, we probably shouldn't have any labels attributes in here either
1152				foreach my $n ($self->findnodes('descendant-or-self::*[@labels]', $copy)) {
1153				$n->removeAttribute('labels'); }
1154				# And, if we're relocating the node across documents,
1155				# we may need to patch relative pathnames!
1156				# ????? Something to think about in the future...
1157				# if(my $base = $options{basepathname}){
1158				# foreach my $n ($self->findnodes('descendant::/@graphic or descendant::/@href', $copy)) {
1159				# $n->setvalue(relocate($n->value,$base)); }}
1160				return $copy; }
1161
1162				sub cloneNodes {
1163				my ($self, @nodes) = @_;
1164				return map { $self->cloneNode($_) } @nodes; }
1165
1166				sub addSSValues {
1167				my ($self, $node, $key, $values) = @_;
1168				$values = $values->toAttribute if ref $values;
1169				if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK!
1170				my @values = split(/\s/, $values);
1171				if (my $oldvalues = $node->getAttribute($key)) { # previous values?
1172				my @old = split(/\s/, $oldvalues);
1173				foreach my $new (@values) {
1174				push(@old, $new) unless grep { $_ eq $new } @old; }
1175				$node->setAttribute($key => join(' ', sort @old)); }
1176				else {
1177				$node->setAttribute($key => join(' ', sort @values)); } }
1178				return; }
1179
1180				sub addClass {
1181				my ($self, $node, $class) = @_;
1182				return $self->addSSValues($node, class => $class); }
1183
1184				#======================================================================
1185				# DUPLICATED from Core::Document...(see discussion there)
1186				# Decorations on one side of an XMDual should be attributed to the
1187				# parent node on the other side (see ->associateIDs)
1188
1189				sub markXMNodeVisibility {
1190				my ($self) = @_;
1191				foreach my $math ($self->findnodes('//ltx:XMath/*')) {
1192				$self->markXMNodeVisibility_aux($math, 1, 1); }
1193				return; }
1194
1195				sub markXMNodeVisibility_aux {
1196				my ($self, $node, $cvis, $pvis) = @_;
1197				return unless $node;
1198				my $qname = $self->getQName($node);
1199				return if (!$cvis \|\| $node->getAttribute('_cvis')) && (!$pvis \|\| $node->getAttribute('_pvis'));
1200				$node->setAttribute('_cvis' => 1) if $cvis;
1201				$node->setAttribute('_pvis' => 1) if $pvis;
1202				if ($qname eq 'ltx:XMDual') {
1203				my ($c, $p) = element_nodes($node);
1204				$self->markXMNodeVisibility_aux($c, 1, 0) if $cvis;
1205				$self->markXMNodeVisibility_aux($p, 0, 1) if $pvis; }
1206				elsif ($qname eq 'ltx:XMRef') {
1207				# $self->markXMNodeVisibility_aux($self->realizeXMNode($node),$cvis,$pvis); }
1208				my $id = $node->getAttribute('idref');
1209				$self->markXMNodeVisibility_aux($self->findNodeByID($id), $cvis, $pvis); }
1210				else {
1211				foreach my $child (element_nodes($node)) {
1212				$self->markXMNodeVisibility_aux($child, $cvis, $pvis); } }
1213				return; }
1214
1215				#======================================================================
1216				# Given a list of nodes (or node constructors [tag,attr,content...])
1217				# conjoin given a conjunction like ',' or a pair like [',', ' and ']
1218				sub conjoin {
1219				my ($self, $conjunction, @nodes) = @_;
1220				my ($comma, $and) = ($conjunction, $conjunction);
1221				($comma, $and) = @$conjunction if ref $conjunction;
1222				my $n = scalar(@nodes);
1223				if ($n < 2) {
1224				return @nodes; }
1225				else {
1226				my @foo = ();
1227				push(@foo, shift(@nodes));
1228				while ($nodes[1]) {
1229				push(@foo, $comma, shift(@nodes)); }
1230				push(@foo, $and, shift(@nodes));
1231				return @foo; } }
1232
1233				# Find the initial letter in a string, or *.
1234				# Uses unicode decomposition to reduce accented characters to A-Z
1235				# If $force is true, skips any non-letter initials
1236				sub initial {
1237				my ($self, $string, $force) = @_;
1238				$string = NFD($string); # Decompose accents, etc.
1239				$string =~ s/^\s+//gs;
1240				$string =~ s/^[^a-zA-Z]*// if $force;
1241				return ($string =~ /^([a-zA-Z])/ ? uc($1) : '*'); }
1242
1243				# This would typically be called to normalize the leading/trailing whitespace of nodes
1244				# that take mixed markup. WE SHOULDN'T BE DOING THIS. We need to NOT add "ignorable whitespace"
1245				# to nodes that CAN HAVE mixed content. otherwise we don't know if it is ignorable!
1246				sub trimChildNodes {
1247				my ($self, $node) = @_;
1248				if (!$node) {
1249				return (); }
1250				elsif (!ref $node) {
1251				return ($node); }
1252				elsif (my @children = $node->childNodes) {
1253				if ($children[0]->nodeType == XML_TEXT_NODE) {
1254				my $s = $children[0]->data;
1255				$s =~ s/^\s+//;
1256				if ($s) {
1257				$children[0]->setData($s); }
1258				else {
1259				shift(@children); } }
1260				if ($children[-1]->nodeType == XML_TEXT_NODE) {
1261				my $s = $children[-1]->data;
1262				$s =~ s/\s+$//;
1263				if ($s) {
1264				$children[-1]->setData($s); }
1265				else {
1266				pop(@children); } }
1267				return @children; }
1268				else {
1269				return (); } }
1270
1271				sub unisort {
1272				my ($self, @keys) = @_;
1273				# Get a (possibly cached) sorter from POST appropriate for this document's language
1274				my $lang = $self->getDocumentElement->getAttribute('xml:lang') \|\| 'en';
1275				return $LaTeXML::POST->getsorter($lang)->sort(@keys); }
1276
1277				#======================================================================
1278
1279				sub addNavigation {
1280				my ($self, $relation, $id) = @_;
1281				return if $self->findnode('//ltx:navigation/ltx:ref[@rel="' . $relation . '"][@idref="' . $id . '"]');
1282				my $ref = ['ltx:ref', { idref => $id, rel => $relation, show => 'toctitle' }];
1283				if (my $nav = $self->findnode('//ltx:navigation')) {
1284				$self->addNodes($nav, $ref); }
1285				else {
1286				$self->addNodes($self->getDocumentElement, ['ltx:navigation', {}, $ref]); }
1287				return; }
1288
1289				#======================================================================
1290				# Support for ID's
1291
1292				sub recordID {
1293				my ($self, $id, $node) = @_;
1294				# make an issue if already there?
1295				$$self{idcache}{$id} = $node;
1296				delete $$self{idcache_reserve}{$id}; # And no longer reserved
1297				delete $$self{idcache_reusable}{$id}; # or reusable
1298				return; }
1299
1300				sub findNodeByID {
1301				my ($self, $id) = @_;
1302				my $node = $$self{idcache}{$id};
1303				return $$self{idcache}{$id}; }
1304
1305				sub realizeXMNode {
1306				my ($self, $node) = @_;
1307				if ($self->getQName($node) eq 'ltx:XMRef') {
1308				my $id = $node->getAttribute('idref');
1309				if (my $realnode = $self->findNodeByID($id)) {
1310				#print STDERR "REALIZE $id => $realnode\n";
1311				return $realnode; }
1312				else {
1313				Fatal('expected', 'id', undef, "Cannot find a node with xml:id='$id'");
1314				return; } }
1315				else {
1316				return $node; } }
1317
1318				sub uniquifyID {
1319				my ($self, $baseid, $suffix) = @_;
1320				my $id = $baseid;
1321				$id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix;
1322				my $cachekey = $id;
1323				while (($$self{idcache}{$id} \|\| $$self{idcache_reserve}{$id}) && !$$self{idcache_reusable}{$id}) {
1324				$id = $baseid . radix_alpha(++$$self{idcache_clashes}{$cachekey});
1325				$id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix; }
1326				delete $$self{idcache_reusable}{$id}; # $id is no longer reusable
1327				$$self{idcache_reserve}{$id} = 1; # and we'll consider it reserved until recorded.
1328				return $id; }
1329
1330				# Generate, add and register an xml:id for $node.
1331				# Unless it already has an id, the created id will
1332				# be "structured" relative to it's parent using $prefix
1333				sub generateNodeID {
1334				my ($self, $node, $prefix, $reusable) = @_;
1335				my $id = $node->getAttribute('xml:id');
1336				return $id if $id;
1337				# Find the closest parent with an ID
1338				my ($parent, $pid, $n) = ($node->parentNode, undef, undef);
1339				while ($parent && !($pid = $parent->getAttribute('xml:id'))) {
1340				$parent = $parent->parentNode; }
1341				# Now find the next unused id relative to the parent id, as "prefix"
1342				$pid .= '.' if $pid;
1343				for ($n = 1 ; ($id = $pid . $prefix . $n)
1344				&& ($$self{idcache}{$id} \|\| $$self{idcache_reserved}{$id}) ; $n++) { }
1345				$node->setAttribute('xml:id' => $id);
1346				$$self{idcache}{$id} = $node;
1347				$$self{idcache_reusable}{$id} = $reusable;
1348				# If we've already been scanned, and have fragid's, create one here, too.
1349				if (my $fragid = $parent && $parent->getAttribute('fragid')) {
1350				$node->setAttribute(fragid => $fragid . '.' . $prefix . $n); }
1351				return $id; }
1352
1353				#======================================================================
1354				# adjust_latexml_doctype($doc,"Foo","Bar") =>
1355				#
1356				# "http://dlmf.nist.gov/LaTeXML/LaTeXML-Foo-Bar.dtd">
1357				sub adjust_latexml_doctype {
1358				my ($self, @additions) = @_;
1359				my $doc = $$self{document};
1360				if (my $dtd = $doc->internalSubset) {
1361				if ($dtd->toString
1362				=~ /^$/) {
1363				my ($root, $parts, $system) = ($1, $3, $5);
1364				my ($type, @addns) = split(/ \+ /, $parts);
1365				my %addns = ();
1366				map { $addns{$_} = 1 } @addns, @additions;
1367				@addns = sort keys %addns;
1368				my $publicid = join(' + ', "-//NIST LaTeXML//LaTeXML $type", @addns);
1369				my $systemid = join('-', "http://dlmf.nist.gov/LaTeXML/LaTeXML", @addns) . ".dtd";
1370				$doc->removeInternalSubset; # Apparently we've got to remove it first.
1371				$doc->createInternalSubset($root, $publicid, $systemid); } }
1372				return; }
1373
1374				#======================================================================
1375				# Cache support: storage of data from previous run.
1376				# ?
1377
1378				# cacheFile as parameter ????
1379
1380				sub cacheLookup {
1381				my ($self, $key) = @_;
1382				$self->openCache;
1383				return $$self{cache}{$key}; }
1384
1385				sub cacheStore {
1386				my ($self, $key, $value) = @_;
1387				$self->openCache;
1388				if (defined $value) {
1389				$$self{cache}{$key} = $value; }
1390				else {
1391				delete $$self{cache}{$key}; }
1392				return; }
1393
1394				sub openCache {
1395				my ($self) = @_;
1396				if (!$$self{cache}) {
1397				$$self{cache} = {};
1398				my $dbfile = $self->checkDestination("LaTeXML.cache");
1399				tie %{ $$self{cache} }, 'DB_File', $dbfile, O_RDWR \| O_CREAT
1400				or return Fatal('internal', 'db', undef,
1401				"Couldn't create DB cache for " . $self->getDestination,
1402				"Message was: " . $!,
1403				(-f $dbfile ? "\n(possibly incompatible db format?)" : ''));
1404				}
1405				return; }
1406
1407				sub closeCache {
1408				my ($self) = @_;
1409				if ($$self{cache}) {
1410				untie %{ $$self{cache} };
1411				$$self{cache} = undef; }
1412				return; }
1413
1414				1;
1415				#======================================================================
1416
1417				__END__