File Coverage

blib/lib/LaTeX/TOM/Parser.pm

Criterion	Covered	Total	%
statement	494	647	76.3
branch	148	226	65.4
condition	133	199	66.8
subroutine	40	49	81.6
pod	0	3	0.0
total	815	1124	72.5

line	stmt	bran	cond	sub	pod	time	code
1							###############################################################################
2							#
3							# LaTeX::TOM::Parser
4							#
5							# The parsing class
6							#
7							###############################################################################
8
9							package LaTeX::TOM::Parser;
10
11	10			10		111	use strict;
	10					27
	10					322
12	10					4479	use base qw(
13							LaTeX::TOM::Node
14							LaTeX::TOM::Tree
15	10			10		63	);
	10					18
16	10			10		84	use constant true => 1;
	10					28
	10					607
17	10			10		64	use constant false => 0;
	10					22
	10					551
18
19	10			10		66	use Carp qw(carp croak);
	10					32
	10					494
20	10			10		64	use File::Basename qw(fileparse);
	10					16
	10					1935
21
22							our $VERSION = '0.13';
23
24							my %error_handlers = (
25							0 => sub { warn "parse error: $_[0].\n" },
26							1 => sub { die "parse error: $_[0].\n" },
27							2 => sub {},
28							);
29
30							# Constructor
31							#
32							sub _new {
33	17			17		48	my $class = shift;
34
35	10			10		74	no strict 'refs';
	10					27
	10					80752
36
37							my $self = bless {
38							config => {
39	17					63	BRACELESS => \%{'LaTeX::TOM::BRACELESS'},
40	17					46	INNERCMDS => \%{'LaTeX::TOM::INNERCMDS'},
41	17					45	MATHENVS => \%{'LaTeX::TOM::MATHENVS'},
42	17					43	MATHBRACKETS => \%{'LaTeX::TOM::MATHBRACKETS'},
43	17					43	PARSE_ERRORS_FATAL => ${'LaTeX::TOM::PARSE_ERRORS_FATAL'},
44	17					41	TEXTENVS => \%{'LaTeX::TOM::TEXTENVS'},
	17					101
45							},
46							};
47
48	17					73	$self->_init(@_);
49
50	17					63	return $self;
51							}
52
53							sub new {
54							# XXX deprecated as of 2023-01-30
55	0			0	0	0	carp 'Direct use of LaTeX::TOM::Parser constructor is deprecated and will be removed in future version';
56	0					0	shift->_new(@_);
57							}
58
59							# Set/reset "globals"
60							#
61							sub _init {
62	17			17		33	my $parser = shift;
63	17					44	my ($parse_errors_fatal, $readinputs, $applymappings) = @_;
64
65							my $retrieve_opt_default = sub
66							{
67	51			51		157	my ($opt, $default) = @_;
68	51	100				161	return $opt if defined $opt;
69	23					87	return $default;
70	17					95	};
71
72							# set user options
73							#
74	17					48	$parser->{readinputs} = $retrieve_opt_default->($readinputs, 0);
75	17					75	$parser->{applymappings} = $retrieve_opt_default->($applymappings, 0);
76	17					51	$parser->{PARSE_ERRORS_FATAL} = $retrieve_opt_default->($parse_errors_fatal, $parser->{config}{PARSE_ERRORS_FATAL});
77
78							# init internal stuff
79							#
80	17					41	$parser->{MATHBRACKETS} = $parser->{config}{MATHBRACKETS};
81
82							# this will hold a running list/hash of commands that have been remapped
83	17					34	$parser->{MAPPEDCMDS} = {};
84
85							# this will hold a running list/hash of commands that have been used. We dont
86							# bother apply mappings except to commands that have been used.
87	17					36	$parser->{USED_COMMANDS} = {};
88
89							# no file yet
90	17					100	$parser->{file} = undef;
91							}
92
93							# Parse a LaTeX file, return a tree. You probably want this method.
94							#
95							sub parseFile {
96	9			9	0	526	my $parser = shift;
97	9					19	my $filename = shift;
98
99							# init variables
100							#
101	9					19	$parser->{file} = $filename; # file name member data
102	9					19	my $tree = {}; # init output tree
103
104							# read in text from file or bomb out
105							#
106	9					30	my $text = _readFile($filename, true);
107
108							# do the parse
109							#
110	9					53	$tree = $parser->parse($text);
111
112	9					37	return $tree;
113							}
114
115							# main parsing entrypoint
116							#
117							sub parse {
118	19			19	0	4517	my $parser = shift;
119	19					50	my ($text) = @_;
120
121							# first half of parsing (goes up to finding commands, reading inputs)
122							#
123	19					62	my ($tree, $bracehash) = $parser->_parseA($text);
124							_debug(
125							'done with _parseA',
126	0			0		0	sub { $tree->_warn() },
127	19					121	);
128
129							# handle mappings
130							#
131	19	100				96	$parser->_applyMappings($tree) if $parser->{applymappings};
132							_debug(
133							'done with _applyMappings',
134	0			0		0	sub { $tree->_warn() },
135	19					94	);
136
137							# second half of parsing (environments)
138							#
139	19					75	$parser->_parseB($tree);
140							_debug(
141							'done with _parseB',
142	0			0		0	sub { $tree->_warn() },
143	19					153	);
144
145							# once all the above is done we can propegate math/plaintext modes down
146							#
147	19					118	$parser->_propegateModes($tree, 0, 0); # math = 0, plaintext = 0
148							_debug(
149							'done with _propegateModes',
150	0			0		0	sub { $tree->_warn() },
151	19					134	);
152
153							# handle kooky \[ \] math mode
154							#
155	19	50				100	if (not exists $parser->{MAPPEDCMDS}->{'\\['}) {
156							# math mode (\[ \], )
157	19					119	$parser->_stage5($tree, {'\\[' => '\\]', '\$' => '\$'}, 1);
158	19					88	$parser->_propegateModes($tree, 0, 0); # have to do this again of course
159	19					57	$parser->{MATHBRACKETS}->{'\\['} = '\\]'; # put back in brackets list for
160	19					57	$parser->{MATHBRACKETS}->{'\$'} = '\$'; # printing purposes.
161							}
162							_debug(
163							undef,
164	0			0		0	sub { $tree->_warn() },
165	19					113	);
166
167	19					109	$tree->listify; # add linked-list stuff
168
169	19					113	return $tree;
170							}
171
172							# Parsing with no mappings and no externally accessible parser object.
173							#
174							sub _basicparse {
175	8			8		17	my $parser = shift; # @_ would break code
176	8					13	my $text = shift;
177
178	8	50				27	my $parse_errors_fatal = (defined $_[0] ? $_[0] : $parser->{config}{PARSE_ERRORS_FATAL});
179	8	100				22	my $readinputs = (defined $_[1] ? $_[1] : 1);
180
181	8					42	$parser = LaTeX::TOM::Parser->_new($parse_errors_fatal, $readinputs);
182	8					27	my ($tree, $bracehash) = $parser->_parseA($text);
183
184	8					28	$parser->_parseB($tree);
185
186	8					31	$tree->listify; # add linked-list stuff
187
188	8					74	return ($tree, $bracehash);
189							}
190
191							# start the tree. separate out comment and text nodes.
192							#
193							sub _stage1 {
194	27			27		47	my $parser = shift;
195	27					45	my $text = shift;
196
197	27					81	my @nodes = _getTextAndCommentNodes($text, 0, length($text));
198
199	27					131	return LaTeX::TOM::Tree->_new([@nodes]);
200							}
201
202							# this stage parses the braces ({}) and adds the corresponding structure to
203							# the tree.
204							#
205							sub _stage2 {
206	146			146		251	my $parser = shift;
207
208	146					181	my $tree = shift;
209	146		100			337	my $bracehash = shift \|\| undef;
210	146		50			396	my $startidx = shift \|\| 0; # last two params for starting at some specific
211	146		50			369	my $startpos = shift \|\| 0; # node and offset.
212
213	146					206	my %blankhash;
214
215	146	100				275	if (not defined $bracehash) {
216	27					71	$bracehash = {%blankhash};
217							}
218
219	146					194	my $leftidx = -1;
220	146					210	my $leftpos = -1;
221	146					190	my $leftcount = 0;
222
223							# loop through the nodes
224	146					238	for (my $i = $startidx; $i < @{$tree->{nodes}}; $i++) {
	410					890
225	264					384	my $node = $tree->{nodes}[$i];
226	264					377	my $spos = $node->{start}; # get text start position
227
228							# set position placeholder within the text block
229	264	100				480	my $pos = ($i == $startidx) ? $startpos : 0;
230
231	264	50				630	if ($node->{type} eq 'TEXT') {
232
233	264					884	_debug("parseStage2: looking at text node: [$node->{content}]", undef);
234
235	264					571	my ($nextpos, $brace) = _findbrace($node->{content}, $pos);
236	264					629	while ($nextpos != -1) {
237
238	244					330	$pos = $nextpos + 1; # update position pointer
239
240							# handle left brace
241	244	100				532	if ($brace eq '{') {
		50
242	122					370	_debug("found '{' at position $nextpos, leftcount is $leftcount", undef);
243	122	100				250	if ($leftcount == 0) {
244	119					189	$leftpos = $nextpos;
245	119					152	$leftidx = $i
246							}
247	122					187	$leftcount++;
248							}
249
250							# handle right brance
251							elsif ($brace eq '}') {
252
253	122					369	_debug("found '}' at position $nextpos, leftcount is $leftcount", undef);
254	122					198	my $rightpos = $nextpos;
255	122					172	$leftcount--;
256
257							# found the corresponding right brace to our starting left brace
258	122	100				232	if ($leftcount == 0) {
259
260							# see if we have to split the text node into 3 parts
261							#
262	119	50				228	if ($leftidx == $i) {
263
264	119					308	my ($leftside, $textnode3) = $node->split($rightpos, $rightpos);
265	119					301	my ($textnode1, $textnode2) = $leftside->split($leftpos, $leftpos);
266
267							# make the new GROUP node
268							my $groupnode = LaTeX::TOM::Node->_new(
269							{type => 'GROUP',
270							start => $textnode2->{start} - 1,
271	119					435	end => $textnode2->{end} + 1,
272							children => LaTeX::TOM::Tree->_new([$textnode2]),
273							});
274
275							# splice the new subtree into the old location
276	119					198	splice @{$tree->{nodes}}, $i, 1, $textnode1, $groupnode, $textnode3;
	119					382
277
278							# add to the brace-pair lookup table
279	119					383	$bracehash->{$groupnode->{start}} = $groupnode->{end};
280	119					357	$bracehash->{$groupnode->{end}} = $groupnode->{start};
281
282							# recur into new child node
283	119					411	$parser->_stage2($groupnode->{children}, $bracehash);
284
285	119					352	$i++; # skip to textnode3 for further processing
286							}
287
288							# split across nodes
289							#
290							else {
291
292	0					0	my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos);
293	0					0	my ($textnode3, $textnode4) = $node->split($rightpos, $rightpos);
294
295							# remove nodes in between the node we found '{' in and the node
296							# we found '}' in
297							#
298	0					0	my @removed = splice @{$tree->{nodes}}, $leftidx+1, $i-$leftidx-1;
	0					0
299
300							# create a group node that contains the text after the left brace,
301							# then all the nodes up until the next text node, then the text
302							# before the right brace.
303							#
304							my $groupnode = LaTeX::TOM::Node->_new(
305							{type => 'GROUP',
306							start => $textnode2->{start} - 1,
307	0					0	end => $textnode3->{end} + 1,
308							children => LaTeX::TOM::Tree->_new(
309							[$textnode2,
310							@removed,
311							$textnode3]),
312							});
313
314							# replace the two original text nodes with the leftover left and
315							# right portions, as well as the group node with everything in
316							# the middle.
317							#
318	0					0	splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $groupnode, $textnode4;
	0					0
319
320							# add to the brace-pair lookup table
321	0					0	$bracehash->{$groupnode->{start}} = $groupnode->{end};
322	0					0	$bracehash->{$groupnode->{end}} = $groupnode->{start};
323
324							# recur into new child nodes
325	0					0	$parser->_stage2($groupnode->{children}, $bracehash);
326
327							# step back to textnode4 on this level for further processing
328	0					0	$i -= scalar @removed;
329							}
330
331	119					219	$leftpos = -1; # reset left data
332	119					151	$leftidx = -1;
333	119					312	last;
334							} # $leftcount == 0
335
336							# check for '}'-based error
337							#
338	3	50				9	if ($leftcount < 0) {
339	0					0	$error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("'}' before '{' at " . ($spos + $rightpos));
340	0					0	$leftcount = 0; # reset and continue
341							}
342							} # right brace
343
344	125					251	($nextpos, $brace) = _findbrace($node->{content}, $pos);
345
346							} # while (braces left)
347
348							} # if TEXT
349
350							} # loop over all nodes
351
352							# check for extra '{' parse error
353							#
354	146	50				374	if ($leftcount > 0) {
355	0					0	my $spos = $tree->{nodes}[$leftidx]->{start}; # get text start position
356	0					0	$error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '{' at " . ($spos + $leftpos));
357
358							# try to continue on, after the offending brace
359	0					0	$parser->_stage2($tree, $bracehash, $leftidx, $leftpos + 1);
360							}
361
362	146					293	return $bracehash;
363							}
364
365							# this stage finds LaTeX commands and accordingly turns GROUP nodes into
366							# command nodes, labeled with the command
367							#
368							sub _stage3 {
369	146			146		221	my $parser = shift;
370
371	146					202	my $tree = shift;
372	146					178	my $parent = shift;
373
374	146					231	for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
	531					1131
375
376	385					515	my $node = $tree->{nodes}[$i];
377
378							# check text node for command tag
379	385	100				690	if ($node->{type} eq 'TEXT') {
380	266					383	my $text = $node->{content};
381
382							# inner command (such as {\command text text}). our regexp checks to see
383							# if this text chunk begins with \command, since that would be the case
384							# due to the previous parsing stages. if found, the parent node is
385							# promoted to a command.
386							#
387	266	100	100			962	if ($text =~ /^\s\\(\w+\?)/ && defined $parent && $parser->{config}{INNERCMDS}->{$1}) {
			100
388	2					5	my $command = $1;
389
390							# if the parent is already a command node, we have to make a new
391							# nested command node
392							#
393	2	50				7	if ($parent->{type} eq 'COMMAND') {
		50
394
395							# make a new command node
396							my $newnode = LaTeX::TOM::Node->_new(
397							{type => 'COMMAND',
398							command => $command,
399							start => $parent->{start},
400							end => $parent->{end},
401							position => 'inner',
402	0					0	children => $parent->{children} });
403
404							# point parent to it
405	0					0	$parent->{children} = LaTeX::TOM::Tree->_new([$newnode]);
406
407							# start over at this level (get additional inner commands)
408	0					0	$parent = $newnode;
409	0					0	$i = -1;
410
411	0					0	$parser->{USED_COMMANDS}->{$newnode->{command}} = 1;
412							}
413
414							# parent is a naked group, we can make it into a command node
415							#
416							elsif ($parent->{type} eq 'GROUP') {
417	2					4	$parent->{type} = 'COMMAND';
418	2					4	$parent->{command} = $command;
419	2					5	$parent->{position} = 'inner';
420
421							# start over at this level
422	2					2	$i = -1;
423
424	2					5	$parser->{USED_COMMANDS}->{$parent->{command}} = 1;
425							}
426
427	2					8	$node->{content} =~ s/^\s\\(?:\w+\?)//o;
428							}
429
430							# outer command (such as \command{parameters}). our regexp checks to
431							# see if this text chunk ends in \command, since that would be the case
432							# due to the previous parsing stages.
433							#
434	266	100	100			1252	if ($text =~ /(?:^\|[^\\])(\\\w+\?(\s\[.?\])?)\s$/os &&
			66
435							defined $tree->{nodes}[$i+1] &&
436							$tree->{nodes}[$i+1]->{type} eq 'GROUP') {
437
438	112					302	my $tag = $1;
439
440	112					381	_debug("found text node [$text] with command tag [$tag]", undef);
441
442							# remove the text
443	112					535	$node->{content} =~ s/\\\w+\?\s(?:\[.?\])?\s$//os;
444
445							# parse it for command and ops
446	112					343	$tag =~ /^\\(\w+\?)\s(?:\[(.*?)\])?$/os;
447
448	112					225	my $command = $1;
449	112					199	my $opts = $2;
450
451							# make the next node a command node with the above data
452	112					238	my $next = $tree->{nodes}[$i+1];
453
454	112					177	$next->{type} = 'COMMAND';
455	112					222	$next->{command} = $command;
456	112					180	$next->{opts} = $opts;
457	112					186	$next->{position} = 'outer';
458
459	112					291	$parser->{USED_COMMANDS}->{$next->{command}} = 1;
460							}
461
462							# recognize braceless commands
463							#
464	266	100	66			1283	if ($text =~ /(\\(\w+\*?)[ \t]+(\S+))/gso \|\| $text =~ /(\\(\w+)(\d+))/gso) {
465	2					4	my $all = $1;
466	2					13	my $command = $2;
467	2					4	my $param = $3;
468
469	2	50				8	if ($parser->{config}{BRACELESS}->{$command}) {
470							# warn "found braceless command $command with param $param";
471
472							# get location to split from node text
473	0					0	my $a = index $node->{content}, $all, 0;
474	0					0	my $b = $a + length($all) - 1;
475
476							# make all the new nodes
477
478							# new left and right text nodes
479	0					0	my ($leftnode, $rightnode) = $node->split($a, $b);
480
481							# param contents node
482	0					0	my $pstart = index $node->{content}, $param, $a;
483							my $newchild = LaTeX::TOM::Node->_new(
484							{type => 'TEXT',
485							start => $node->{start} + $pstart,
486	0					0	end => $node->{start} + $pstart + length($param) - 1,
487							content => $param });
488
489							# new command node
490							my $commandnode = LaTeX::TOM::Node->_new(
491							{type => 'COMMAND',
492							braces => 0,
493							command => $command,
494							start => $node->{start} + $a,
495	0					0	end => $node->{start} + $b,
496							children => LaTeX::TOM::Tree->_new([$newchild]),
497							});
498
499	0					0	$parser->{USED_COMMANDS}->{$commandnode->{command}} = 1;
500
501							# splice these all into the original array
502	0					0	splice @{$tree->{nodes}}, $i, 1, $leftnode, $commandnode, $rightnode;
	0					0
503
504							# make the rightnode the node we're currently analyzing
505	0					0	$node = $rightnode;
506
507							# make sure outer loop will continue parsing after rightnode
508	0					0	$i += 2;
509							}
510							}
511							}
512
513							# recur
514	385	100	100			1272	if ($node->{type} eq 'GROUP' \|\|
515							$node->{type} eq 'COMMAND') {
516
517	119					312	$parser->_stage3($node->{children}, $node);
518							}
519							}
520							}
521
522							# this stage finds \begin{x} \end{x} environments and shoves their contents
523							# down into a new child node, with a parent node of ENVIRONMENT type.
524							#
525							# this has the effect of making the tree deeper, since much of the structure
526							# is in environment tags and will now be picked up.
527							#
528							# for ENVIRONMENTs, "start" means the ending } on the \begin tag,
529							# "end" means the starting \ on the \end tag,
530							# "ostart" is the starting \ on the "begin" tag,
531							# "oend" is the ending } on the "end" tag, and
532							# and "class" is the "x" from above.
533							#
534							sub _stage4 {
535	144			144		223	my $parser = shift;
536	144					183	my $tree = shift;
537
538	144					197	my $bcount = 0; # \begin "stack count"
539	144					202	my $class = ""; # environment class
540	144					174	my $bidx = 0; # \begin array index.
541
542	144					219	for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
	604					1104
543	460					623	my $node = $tree->{nodes}->[$i];
544
545							# see if this is a "\begin" command node
546	460	100	100			1945	if ($node->{type} eq 'COMMAND' && $node->{command} eq 'begin') {
		100	100
		100	100
547
548	32					158	_debug("parseStage4: found a begin COMMAND node, $node->{children}->{nodes}[0]->{content} @ $node->{start}", undef);
549
550							# start a new "stack"
551	32	100				99	if ($bcount == 0) {
		50
552	28					59	$bidx = $i;
553	28					44	$bcount++;
554	28					74	$class = $node->{children}->{nodes}->[0]->{content};
555	28					81	_debug("parseStage4: opening environment tag found, class = $class", undef);
556							}
557
558							# add to the "stack"
559							elsif ($node->{children}->{nodes}->[0]->{content} eq $class) {
560	0					0	$bcount++;
561	0					0	_debug("parseStage4: incrementing tag count for $class", undef);
562							}
563							}
564
565							# handle "\end" command nodes
566							elsif ($node->{type} eq 'COMMAND' &&
567							$node->{command} eq 'end' &&
568							$node->{children}->{nodes}->[0]->{content} eq $class) {
569
570	27					57	$bcount--;
571	27					97	_debug("parseStage4: decrementing tag count for $class", undef);
572
573							# we found our closing "\end" tag. replace everything with the proper
574							# ENVIRONMENT tag and subtree.
575							#
576	27	50				88	if ($bcount == 0) {
577
578	27					88	_debug("parseStage4: closing environment $class", undef);
579
580							# first we must take everything between the "\begin" and "\end"
581							# nodes and put them in a new array, removing them from the old one
582	27					70	my @newarray = splice @{$tree->{nodes}}, $bidx+1, $i - ($bidx + 1);
	27					100
583
584							# make the ENVIRONMENT node
585	27					73	my $start = $tree->{nodes}[$bidx]->{end};
586	27					56	my $end = $node->{start};
587	27					168	my $envnode = LaTeX::TOM::Node->_new(
588							{type => 'ENVIRONMENT',
589							class => $class,
590							start => $start, # "inner" start and end
591							end => $end,
592							ostart => $start - length('begin') - length($class) - 2,
593							oend => $end + length('end') + length($class) + 2,
594							children => LaTeX::TOM::Tree->_new([@newarray]),
595							});
596
597	27	50				119	if ($parser->{config}{MATHENVS}->{$envnode->{class}}) {
598	0					0	$envnode->{math} = 1;
599							}
600
601							# replace the \begin and \end COMMAND nodes with the single
602							# environment node
603	27					45	splice @{$tree->{nodes}}, $bidx, 2, $envnode;
	27					160
604
605	27					57	$class = ""; # reset class.
606
607							# i is going to change by however many nodes we removed
608	27					49	$i -= scalar @newarray;
609
610							# recur into the children
611	27					75	$parser->_stage4($envnode->{children});
612							}
613							}
614
615							# recur in general
616							elsif ($node->{children}) {
617	90					361	$parser->_stage4($node->{children});
618							}
619							}
620
621							# parse error if we're missing an "\end" tag.
622	144	100				396	if ($bcount > 0) {
623	1					9	$error_handlers{$parser->{PARSE_ERRORS_FATAL}}->(
624							"missing \\end{$class} for \\begin{$class} at position $tree->{nodes}[$bidx]->{end}"
625							);
626							}
627							}
628
629							# This is the "math" stage: here we grab simple-delimiter math modes from
630							# the text they are embedded in, and turn those into new groupings, with the
631							# "math" flag set.
632							#
633							# having this top level to go over all the bracket types prevents some pretty
634							# bad combinatorial explosion
635							#
636							sub _stage5 {
637	46			46		68	my $parser = shift;
638
639	46					72	my $tree = shift;
640	46		100			139	my $caremath = shift \|\| 0;
641
642	46					87	my $brackets = $parser->{MATHBRACKETS};
643
644							# loop through all the different math mode bracket types
645	46					231	foreach my $left (sort {length($b) <=> length($a)} keys %$brackets) {
	142					304
646	140					233	my $right = $brackets->{$left};
647
648	140					306	$parser->_stage5_r($tree, $left, $right, $caremath);
649							}
650							}
651
652							# recursive meat of above
653							#
654							sub _stage5_r {
655	554			554		759	my $parser = shift;
656
657	554					669	my $tree = shift;
658	554					722	my $left = shift;
659	554					681	my $right = shift;
660	554		100			1182	my $caremath = shift \|\| 0; # do we care if we're already in math mode?
661							# this matters for , \[ \]
662
663	554					691	my $leftpos = -1; # no text pos for found left brace yet.
664	554					659	my $leftidx = -1; # no array index for found left brace yet.
665
666							# loop through the nodes
667	554					728	for (my $i = 0; $i < scalar @{$tree->{nodes}}; $i++) {
	2120					3954
668	1566					2110	my $node = $tree->{nodes}[$i];
669	1566					1907	my $pos = 0; # position placeholder within the text block
670	1566					2118	my $spos = $node->{start}; # get text start position
671
672	1566	100	66			7157	if ($node->{type} eq 'TEXT' &&
		100	66
			66
			66
673							(!$caremath \|\| (!$node->{math} && $caremath))) {
674
675							# search for left brace if we haven't started a pair yet
676	1114	50				1860	if ($leftidx == -1) {
677	1114					1771	$leftpos = _findsymbol($node->{content}, $left, $pos);
678
679	1114	50				2012	if ($leftpos != -1) {
680	0					0	_debug("found (left) $left in [$node->{content}]", undef);
681	0					0	$leftidx = $i;
682	0					0	$pos = $leftpos + 1; # next pos to search from
683							}
684							}
685
686							# search for a right brace
687	1114	50				1671	if ($leftpos != -1) {
688	0					0	my $rightpos = _findsymbol($node->{content}, $right, $pos);
689
690							# found
691	0	0				0	if ($rightpos != -1) {
692
693							# we have to split the text node into 3 parts
694	0	0				0	if ($leftidx == $i) {
695	0					0	_debug("splitwithin: found (right) $right in [$node->{content}]", undef);
696
697	0					0	my ($leftnode, $textnode3) = $node->split($rightpos, $rightpos + length($right) - 1);
698	0					0	my ($textnode1, $textnode2) = $leftnode->split($leftpos, $leftpos + length($left) - 1);
699
700	0					0	my $startpos = $spos; # get text start position
701
702							# make the math ENVIRONMENT node
703	0					0	my $mathnode = LaTeX::TOM::Node->_new(
704							{type => 'ENVIRONMENT',
705							class => $left, # use left delim as class
706							math => 1,
707							start => $startpos + $leftpos,
708							ostart => $startpos + $leftpos - length($left) + 1,
709							end => $startpos + $rightpos,
710							oend => $startpos + $rightpos + length($right) - 1,
711							children => LaTeX::TOM::Tree->_new([$textnode2]),
712							});
713
714	0					0	splice @{$tree->{nodes}}, $i, 1, $textnode1, $mathnode, $textnode3;
	0					0
715
716	0					0	$i++; # skip ahead two nodes, so we'll be parsing textnode3
717							}
718
719							# split across nodes
720							else {
721
722	0					0	_debug("splitacross: found (right) $right in [$node->{content}]", undef);
723
724							# create new set of 4 smaller text nodes from the original two
725							# that contain the left and right delimiters
726							#
727	0					0	my ($textnode1, $textnode2) = $tree->{nodes}[$leftidx]->split($leftpos, $leftpos + length($left) - 1);
728	0					0	my ($textnode3, $textnode4) = $tree->{nodes}[$i]->split($rightpos, $rightpos + length($right) - 1);
729
730							# nodes to remove "from the middle" (between the left and right
731							# text nodes which contain the delimiters)
732							#
733	0					0	my @remnodes = splice @{$tree->{nodes}}, $leftidx+1, $i - $leftidx - 1;
	0					0
734
735							# create a math node that contains the text after the left brace,
736							# then all the nodes up until the next text node, then the text
737							# before the right brace.
738							#
739							my $mathnode = LaTeX::TOM::Node->_new(
740							{type => 'ENVIRONMENT',
741							class => $left,
742							math => 1,
743							start => $textnode2->{start} - 1,
744							end => $textnode3->{end} + 1,
745							ostart => $textnode2->{start} - 1 - length($left) + 1,
746	0					0	oend => $textnode3->{end} + 1 + length($right) - 1,
747							children => LaTeX::TOM::Tree->_new(
748							[$textnode2,
749							@remnodes,
750							$textnode3]),
751							});
752
753							# replace (TEXT_A, ... , TEXT_B) with the mathnode created above
754	0					0	splice @{$tree->{nodes}}, $leftidx, 2, $textnode1, $mathnode, $textnode4;
	0					0
755
756							# do all nodes again but the very leftmost
757							#
758	0					0	$i = $leftidx;
759							}
760
761	0					0	$leftpos = -1; # reset left data
762	0					0	$leftidx = -1;
763							} # right brace
764							} # left brace
765							else {
766
767	1114					1835	my $rightpos = _findsymbol($node->{content}, $right, $pos);
768
769	1114	50				2307	if ($rightpos != -1) {
770	0					0	my $startpos = $node->{start}; # get text start position
771	0					0	$error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$right' at " . ($startpos + $rightpos));
772							}
773							}
774							} # if TEXT
775
776							# recur, but not into verbatim environments!
777							#
778							elsif ($node->{children} &&
779							!(
780							($node->{type} eq 'COMMAND' && $node->{command} =~ /^verb/) \|\|
781							($node->{type} eq 'ENVIRONMENT' && $node->{class} =~ /^verbatim/))) {
782
783	414	50				757	if ($LaTeX::TOM::DEBUG) {
784	0					0	my $message = "Recurring into $node->{type} node ";
785	0	0				0	$message .= $node->{command} if ($node->{type} eq 'COMMAND');
786	0	0				0	$message .= $node->{class} if ($node->{type} eq 'ENVIRONMENT');
787	0					0	_debug($message, undef);
788							}
789
790	414					1057	$parser->_stage5_r($node->{children}, $left, $right, $caremath);
791							}
792
793							} # loop over text blocks
794
795	554	50				1278	if ($leftpos != -1) {
796	0					0	my $startpos = $tree->{nodes}[$leftidx]->{start}; # get text start position
797	0					0	$error_handlers{$parser->{PARSE_ERRORS_FATAL}}->("unmatched '$left' at " . ($startpos + $leftpos));
798							}
799							}
800
801							# This stage propegates the math mode flag and plaintext flags downward.
802							#
803							# After this is done, we can make the claim that only text nodes marked with
804							# the plaintext flag should be printed. math nodes will have the "math" flag,
805							# and also plantext = 0.
806							#
807							sub _propegateModes {
808	190			190		262	my $parser = shift;
809
810	190					237	my $tree = shift;
811	190					261	my $math = shift; # most likely want to call this with 0
812	190					264	my $plaintext = shift; # ditto this-- default to nothing visible.
813
814	190					257	foreach my $node (@{$tree->{nodes}}) {
	190					352
815
816							# handle text nodes on this level. set flags.
817							#
818	536	100				945	if ($node->{type} eq 'TEXT') {
		50
819	384					534	$node->{math} = $math;
820	384					648	$node->{plaintext} = $plaintext;
821							}
822
823							# propegate flags downward, possibly modified
824							#
825							elsif (defined $node->{children}) {
826
827	152					212	my $mathflag = $math; # math propegates down by default
828	152					189	my $plaintextflag = 0; # plaintext flag does NOT propegate by default
829
830							# handle math or plain text forcing envs
831							#
832	152	100	100			508	if ($node->{type} eq 'ENVIRONMENT' \|\| $node->{type} eq 'COMMAND') {
		50
833	148	50	33			1317	if (defined $node->{class} && (
		100	66
			66
			66
			33
			66
			66
834							$parser->{config}{MATHENVS}->{$node->{class}} \|\|
835							$parser->{config}{MATHENVS}->{"$node->{class}*"})
836							)
837							{
838	0					0	$mathflag = 1;
839	0					0	$plaintextflag = 0;
840							}
841							elsif (($node->{type} eq 'COMMAND' &&
842							($parser->{config}{TEXTENVS}->{$node->{command}} \|\|
843							$parser->{config}{TEXTENVS}->{"$node->{command}*"})) \|\|
844							($node->{type} eq 'ENVIRONMENT' &&
845							($parser->{config}{TEXTENVS}->{$node->{class}} \|\|
846							$parser->{config}{TEXTENVS}{"$node->{command}*"}))
847							) {
848
849	80					124	$mathflag = 0;
850	80					107	$plaintextflag = 1;
851							}
852							}
853
854							# groupings change nothing
855							#
856							elsif ($node->{type} eq 'GROUP') {
857	4					8	$mathflag = $math;
858	4					5	$plaintextflag = $plaintext;
859							}
860
861							# recur
862	152					396	$parser->_propegateModes($node->{children}, $mathflag, $plaintextflag);
863							}
864							}
865							}
866
867							# apply a mapping to text nodes in a tree
868							#
869							# for newcommands and defs: mapping is a hash:
870							#
871							# {name, nparams, template, type}
872							#
873							# name is a string
874							# nparams is an integer
875							# template is a tree fragement containing text nodes with #x flags, where
876							# parameters will be replaced.
877							# type is "command"
878							#
879							# for newenvironments:
880							#
881							# {name, nparams, btemplate, etemplate, type}
882							#
883							# same as above, except type is "environment" and there are two templates,
884							# btemplate and etemplate.
885							#
886							sub _applyMapping {
887	12			12		22	my $parser = shift;
888
889	12					14	my $tree = shift;
890	12					24	my $mapping = shift;
891	12		100			27	my $i = shift \|\| 0; # index to start with, in tree.
892
893	12					18	my $applications = 0; # keep track of # of applications
894
895	12					18	for (; $i < @{$tree->{nodes}}; $i++) {
	54					109
896
897	42					58	my $node = $tree->{nodes}[$i];
898
899							# begin environment nodes
900							#
901	42	100	100			322	if ($node->{type} eq 'COMMAND'
		100	100
		100	100
		100	100
		100	100
			66
			100
902							&& $node->{command} eq 'begin'
903							&& $node->{children}->{nodes}[0]->{content} eq $mapping->{name}
904							) {
905							# grab the nparams next group nodes as parameters
906							#
907	1					11	my @params = ();
908
909	1					2	my $remain = $mapping->{nparams};
910	1					2	my $j = 1;
911	1		33			10	while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {
	0					0
912
913	0					0	my $node = $tree->{nodes}[$i + $j];
914
915							# grab group node
916	0	0				0	if ($node->{type} eq 'GROUP') {
917	0					0	push @params, $node->{children};
918	0					0	$remain--;
919							}
920
921	0					0	$j++;
922							}
923
924							# if we didn't get enough group nodes, bomb out
925	1	50				6	next if $remain;
926
927							# otherwise make new subtree
928	1					10	my $applied = _applyParamsToTemplate($mapping->{btemplate}, @params);
929
930							# splice in the result
931	1					2	splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}};
	1					3
	1					4
932
933							# skip past all the new stuff
934	1					3	$i += scalar @{$applied->{nodes}} - 1;
	1					5
935							}
936
937							# end environment nodes
938							#
939							elsif ($node->{type} eq 'COMMAND'
940							&& $node->{command} eq 'end'
941							&& $node->{children}->{nodes}[0]->{content} eq $mapping->{name}
942							) {
943							# make new subtree (no params)
944	1					5	my $applied = $mapping->{etemplate}->copy();
945
946							# splice in the result
947	1					2	splice @{$tree->{nodes}}, $i, 1, @{$applied->{nodes}};
	1					10
	1					12
948
949							# skip past all the new stuff
950	1					2	$i += scalar @{$applied->{nodes}} - 1;
	1					7
951
952	1					3	$applications++; # only count end environment nodes
953							}
954
955							# newcommand nodes
956							#
957							elsif ($node->{type} eq 'COMMAND'
958							&& $node->{command} eq $mapping->{name}
959							&& $mapping->{nparams}
960							) {
961	1					3	my @params = ();
962
963							# children of COMMAND node will be first parameter
964	1					2	push @params, $node->{children};
965
966							# find next nparams GROUP nodes and push their children onto @params
967	1					2	my $remain = $mapping->{nparams} - 1;
968	1					2	my $j = 1;
969	1		33			4	while ($remain > 0 && ($i + $j) < scalar @{$tree->{nodes}}) {
	0					0
970
971	0					0	my $node = $tree->{nodes}[$i + $j];
972
973							# grab group node
974	0	0				0	if ($node->{type} eq 'GROUP') {
975	0					0	push @params, $node->{children};
976	0					0	$remain--;
977							}
978
979	0					0	$j++;
980							}
981
982							# if we didn't get enough group nodes, bomb out
983	1	50				3	next if ($remain > 0);
984
985							# apply the params to the template
986	1					3	my $applied = _applyParamsToTemplate($mapping->{template}, @params);
987
988							# splice in the result
989	1					2	splice @{$tree->{nodes}}, $i, $j, @{$applied->{nodes}};
	1					2
	1					3
990
991							# skip past all the new stuff
992	1					1	$i += scalar @{$applied->{nodes}} - 1;
	1					11
993
994	1					6	$applications++;
995							}
996
997							# find 0-param mappings
998							elsif ($node->{type} eq 'TEXT' && !$mapping->{nparams}) {
999
1000	15					25	my $text = $node->{content};
1001	15					19	my $command = $mapping->{name};
1002
1003							# find occurrences of the mapping command
1004							#
1005	15	50				41	my $wordend = ($command =~ /\w$/ ? 1 : 0);
1006	15		33			113	while (($wordend && $text =~ /\\\Q$command\E(\W\|$)/g) \|\|
			33
			33
1007							(!$wordend && $text =~ /\\\Q$command\E/g)) {
1008
1009	0					0	_debug("found occurrence of mapping $command", undef);
1010
1011	0					0	my $idx = index $node->{content}, '\\' . $command, 0;
1012
1013							# split the text node at that command
1014	0					0	my ($leftnode, $rightnode) = $node->split($idx, $idx + length($command));
1015
1016							# copy the mapping template
1017	0					0	my $applied = $mapping->{template}->copy();
1018
1019							# splice the new nodes in
1020	0					0	splice @{$tree->{nodes}}, $i, 1, $leftnode, @{$applied->{nodes}}, $rightnode;
	0					0
	0					0
1021
1022							# adjust i so we end up on rightnode when we're done
1023	0					0	$i += scalar @{$applied->{nodes}} + 1;
	0					0
1024
1025							# get the next node
1026	0					0	$node = $tree->{nodes}[$i];
1027
1028							# count application
1029	0					0	$applications++;
1030							}
1031							}
1032
1033							# recur
1034							elsif ($node->{children}) {
1035
1036	10					35	$applications += $parser->_applyMapping($node->{children}, $mapping);
1037							}
1038							}
1039
1040	12					30	return $applications;
1041							}
1042
1043							# find and apply all mappings in the tree, progressively and recursively.
1044							# a mapping applies to the entire tree and subtree consisting of nodes AFTER
1045							# itself in the level array.
1046							#
1047							sub _applyMappings {
1048	14			14		20	my $parser = shift;
1049
1050	14					20	my $tree = shift;
1051
1052	14					21	for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
	63					120
1053
1054	49					69	my $prev = $tree->{nodes}[$i-1];
1055	49					67	my $node = $tree->{nodes}[$i];
1056
1057							# find newcommands
1058	49	100	100			313	if ($node->{type} eq 'COMMAND' &&
		100	100
		50	66
		100	66
			33
1059							$node->{command} =~ /^(re)?newcommand$/) {
1060
1061	1					4	my $mapping = _makeMapping($tree, $i);
1062	1	50				4	next if (!$mapping->{name}); # skip fragged commands
1063
1064	1	50				5	if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
1065	1					5	_debug("applying (nc) mapping $mapping->{name}", undef);
1066							} else {
1067	0					0	_debug("NOT applying (nc) mapping $mapping->{name}", undef);
1068	0					0	next;
1069							}
1070
1071							# add to mappings list
1072							#
1073	1					9	$parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;
1074
1075	1					8	_debug("found a mapping with name $mapping->{name}, $mapping->{nparams} params", undef);
1076
1077							# remove the mapping declaration
1078							#
1079	1					3	splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;
	1					4
1080
1081							# apply the mapping
1082	1					5	my $count = $parser->_applyMapping($tree, $mapping, $i);
1083
1084	1	50				3	if ($count > 0) {
1085	1			0		5	_debug("printing altered subtree", sub { $tree->_warn() });
	0					0
1086							}
1087
1088	1					7	$i--; # since we removed the cmd node, check this index again
1089							}
1090
1091							# handle "\newenvironment" mappings
1092							elsif ($node->{type} eq 'COMMAND' &&
1093							$node->{command} =~ /^(re)?newenvironment$/) {
1094
1095							# make a mapping hash
1096							#
1097	1					5	my $mapping = $parser->_makeEnvMapping($tree, $i);
1098	1	50				3	next if (!$mapping->{name}); # skip fragged commands.
1099
1100	1					6	_debug("applying (ne) mapping $mapping->{name}", undef);
1101
1102							# remove the mapping declaration
1103							#
1104	1					2	splice @{$tree->{nodes}}, $i, $mapping->{skip} + 1;
	1					6
1105
1106							# apply the mapping
1107							#
1108	1					4	my $count = $parser->_applyMapping($tree, $mapping, $i);
1109							}
1110
1111							# handle "\def" stype commands.
1112							elsif ($node->{type} eq 'COMMAND' &&
1113							defined $prev &&
1114							$prev->{type} eq 'TEXT' &&
1115							$prev->{content} =~ /\\def\s*$/o) {
1116
1117	0					0	_debug("found def style mapping $node->{command}", undef);
1118
1119							# remove the \def
1120	0					0	$prev->{content} =~ s/\\def\s*$//o;
1121
1122							# make the mapping
1123							my $mapping = {name => $node->{command},
1124							nparams => 0,
1125	0					0	template => $node->{children}->copy(),
1126							type => 'command'};
1127
1128	0	0				0	next if (!$mapping->{name}); # skip fragged commands
1129
1130	0	0				0	if ($parser->{USED_COMMANDS}->{$mapping->{name}}) {
1131	0					0	_debug("applying (def) mapping $mapping->{name}", undef);
1132							} else {
1133	0					0	_debug("NOT applying (def) mapping $mapping->{name}", undef);
1134	0					0	next;
1135							}
1136
1137							# add to mappings list
1138							#
1139	0					0	$parser->{MAPPEDCMDS}->{"\\$mapping->{name}"} = 1;
1140
1141	0			0		0	_debug("template is", sub { $mapping->{template}->_warn() });
	0					0
1142
1143							# remove the command node
1144	0					0	splice @{$tree->{nodes}}, $i, 1;
	0					0
1145
1146							# apply the mapping
1147	0					0	my $count = $parser->_applyMapping($tree, $mapping, $i);
1148
1149	0					0	$i--; # check this index again
1150							}
1151
1152							# recur
1153							elsif ($node->{children}) {
1154
1155	12					27	$parser->_applyMappings($node->{children});
1156							}
1157							}
1158							}
1159
1160							# read files from \input commands and place into the tree, parsed
1161							#
1162							# also include bibliographies
1163							#
1164							sub _addInputs {
1165	89			89		129	my $parser = shift;
1166
1167	89					114	my $tree = shift;
1168
1169	89					111	for (my $i = 0; $i < @{$tree->{nodes}}; $i++) {
	326					645
1170
1171	237					302	my $node = $tree->{nodes}[$i];
1172
1173	237	100	100			875	if ($node->{type} eq 'COMMAND'
		100	100
1174							&& $node->{command} eq 'input'
1175							) {
1176	7					17	my $file = $node->{children}->{nodes}[0]->{content};
1177	7	100				23	next if $file =~ /pstex/; # ignore pstex images
1178
1179	6					26	_debug("reading input file $file", undef);
1180
1181	6					10	my $contents;
1182	6					124	my $filename = fileparse($file);
1183	6					32	my $has_extension = qr/\.\S+$/;
1184
1185							# read in contents of file
1186	6	100	66			216	if (-e $file && $filename =~ $has_extension) {
		50
1187	5					21	$contents = _readFile($file);
1188							}
1189							elsif ($filename !~ $has_extension) {
1190	1					6	$file = "$file.tex";
1191	1	50				20	$contents = _readFile($file) if -e $file;
1192							}
1193
1194							# dump Psfig/TeX files, they aren't useful to us and have
1195							# nonconforming syntax. Use declaration line as our heuristic.
1196							#
1197	6	100	66			46	if (defined $contents
1198							&& $contents =~ m!^ \% \s? Psfig/TeX \s $!mx
1199							) {
1200	1					3	undef $contents;
1201	1					216	carp "ignoring Psfig input `$file'";
1202							}
1203
1204							# actually do the parse of the sub-content
1205							#
1206	6	100				106	if (defined $contents) {
1207							# parse into a tree
1208	5					21	my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});
1209
1210							# replace \input command node with subtree
1211	5					13	splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}};
	5					11
	5					13
1212
1213							# step back
1214	5					22	$i--;
1215							}
1216							}
1217							elsif ($node->{type} eq 'COMMAND'
1218							&& $node->{command} eq 'bibliography'
1219							) {
1220							# try to find a .bbl file
1221							#
1222	1					100	foreach my $file (<*.bbl>) {
1223
1224	1					6	my $contents = _readFile($file);
1225
1226	1	50				8	if (defined $contents) {
1227
1228	1					7	my ($subtree,) = $parser->_basicparse($contents, $parser->{PARSE_ERRORS_FATAL});
1229	1					6	splice @{$tree->{nodes}}, $i, 1, @{$subtree->{nodes}};
	1					4
	1					4
1230	1					4	$i--;
1231							}
1232							}
1233							}
1234
1235							# recur
1236	236	100				468	if ($node->{children}) {
1237	71					157	$parser->_addInputs($node->{children});
1238							}
1239							}
1240							}
1241
1242							# do pre-mapping parsing
1243							#
1244							sub _parseA {
1245	27			27		46	my $parser = shift;
1246	27					49	my $text = shift;
1247
1248	27					66	my $tree = $parser->_stage1($text);
1249	27					87	my $bracehash = $parser->_stage2($tree);
1250
1251	27					94	$parser->_stage3($tree);
1252
1253	27	100				97	$parser->_addInputs($tree) if $parser->{readinputs};
1254
1255	27					102	return ($tree, $bracehash);
1256							}
1257
1258							# do post-mapping parsing (make environments)
1259							#
1260							sub _parseB {
1261	27			27		66	my $parser = shift;
1262	27					50	my $tree = shift;
1263
1264	27					93	$parser->_stage4($tree);
1265
1266	27					84	_debug("done with parseStage4", undef);
1267
1268	27					82	$parser->_stage5($tree, 0);
1269
1270	27					96	_debug("done with parseStage5", undef);
1271							}
1272
1273							###############################################################################
1274							#
1275							# Parser "Static" Subroutines
1276							#
1277							###############################################################################
1278
1279							# find next unescaped char in some text
1280							#
1281							sub _uindex {
1282	0			0		0	my $text = shift;
1283	0					0	my $char = shift;
1284	0					0	my $pos = shift;
1285
1286	0					0	my $realbrace = 0;
1287	0					0	my $idx = -1;
1288
1289							# get next opening brace
1290	0					0	do {
1291	0					0	$realbrace = 1;
1292	0					0	$idx = index $text, $char, $pos;
1293
1294	0	0				0	if ($idx != -1) {
1295	0					0	$pos = $idx + 1;
1296	0					0	my $prevchar = substr $text, $idx - 1, 1;
1297	0	0				0	if ($prevchar eq '\\') {
1298	0					0	$realbrace = 0;
1299	0					0	$idx = -1;
1300							}
1301							}
1302							} while (!$realbrace);
1303
1304	0					0	return $idx;
1305							}
1306
1307							sub _find {
1308	3006			3006		4956	my ($text, $symbol, $pos) = @_;
1309
1310	3006					3916	my ($found, $index);
1311
1312							# get next occurrence of the symbol
1313	3006					3577	do {
1314	3006					3730	$found = true;
1315	3006					5059	$index = index $text, $symbol, $pos;
1316
1317	3006	50	100			6279	if ($symbol eq '}' && $index - 1 >= 0 && substr($text, $index - 1, 1) eq ' ') {
			66
1318							#$pos = $index + 1;
1319	0					0	$index = -1;
1320							}
1321
1322	3006	100				6032	if ($index != -1) {
1323	460					595	$pos = $index + 1;
1324
1325							# make sure this occurrence isn't escaped. this is imperfect.
1326	460	100				921	my $prev_char = ($index - 1 >= 0) ? (substr $text, $index - 1, 1) : '';
1327	460	100				789	my $pprev_char = ($index - 2 >= 0) ? (substr $text, $index - 2, 1) : '';
1328
1329	460	50	33			1257	if ($prev_char eq '\\' && $pprev_char ne '\\') {
1330	0					0	$found = false;
1331							}
1332							}
1333							} until ($found);
1334
1335	3006					4673	return $index;
1336							}
1337
1338							# support function: find the next occurrence of some symbol which is
1339							# not escaped.
1340							#
1341							sub _findsymbol {
1342	2228			2228		3340	return _find(@_);
1343							}
1344
1345							# support function: find the earliest next brace in some (flat) text
1346							#
1347							sub _findbrace {
1348	389			389		635	my ($text, $pos) = @_;
1349
1350	389					632	my $index_o = _find($text, '{', $pos);
1351	389					651	my $index_c = _find($text, '}', $pos);
1352
1353							# handle all find cases
1354	389	100	100			1799	if ($index_o == -1 && $index_c == -1) {
		100	100
		50	33
			66
1355	145					374	return (-1, '');
1356							}
1357							elsif ($index_c == -1 \|\| ($index_o != -1 && $index_o < $index_c)) {
1358	122					298	return ($index_o, '{');
1359							}
1360							elsif ($index_o == -1 \|\| $index_c < $index_o) {
1361	122					393	return ($index_c, '}');
1362							}
1363							}
1364
1365
1366							# skip "blank nodes" in a tree, starting at some position. will finish
1367							# at the first non-blank node. (ie, not a comment or whitespace TEXT node.
1368							#
1369							sub _skipBlankNodes {
1370	2			2		6	my ($tree, $i) = @_;
1371
1372	2					4	my $node = $tree->{nodes}[$$i];
1373
1374	2		66			16	while ($node->{type} eq 'COMMENT'
			66
1375							\|\| ($node->{type} eq 'TEXT'
1376							&& $node->{content} =~ /^\s*$/s)
1377							) {
1378	1					5	$node = $tree->{nodes}[++$$i];
1379							}
1380							}
1381
1382							# is the passed-in node a valid parameter node? for this to be true, it must
1383							# either be a GROUP or a position = inner command.
1384							#
1385							sub _validParamNode {
1386	2			2		8	my ($node) = @_;
1387
1388	2	50	0			6	if ($node->{type} eq 'GROUP'
			33
1389							\|\| ($node->{type} eq 'COMMAND' && $node->{position} eq 'inner')) {
1390	2					6	return true;
1391							}
1392	0					0	return false;
1393							}
1394
1395							# duplicate a valid param node. This means for a group, copy the child tree.
1396							# for a command, make a new tree with just the command node and its child tree.
1397							#
1398							sub _duplicateParam {
1399	2			2		4	my $parser = shift;
1400	2					3	my $node = shift;
1401
1402	2	50				5	if ($node->{type} eq 'GROUP') {
		0
1403	2					6	return $node->{children}->copy();
1404							}
1405							elsif ($node->{type} eq 'COMMAND') {
1406
1407	0					0	my $subtree = $node->{children}->copy(); # copy child subtree
1408	0					0	my $nodecopy = $node->copy(); # make a new node with old data
1409	0					0	$nodecopy->{children} = $subtree; # set the child pointer to new subtree
1410
1411							# return a new tree with the new node (subtree) as its only element
1412	0					0	return LaTeX::TOM::Tree->_new([$nodecopy]);
1413							}
1414
1415	0					0	return undef;
1416							}
1417
1418							sub _getMapping {
1419	2			2		8	my ($type, $tree, $i) = @_;
1420
1421	2					13	my $node = $tree->{nodes}[$$i];
1422
1423	2	50	33			16	if ($node->{type} ne 'COMMAND'
			33
1424							\|\| ($node->{command} ne "new$type"
1425							&& $node->{command} ne "renew$type")
1426							) {
1427	0					0	return ();
1428							}
1429
1430							# figure out command (first child, text node)
1431	2					7	my $command = $node->{children}->{nodes}[0]->{content};
1432	2	100				9	if ($command =~ /^\s* \\(\S+) \s*$/x) {
1433	1					3	$command = $1;
1434							}
1435
1436	2					5	$node = $tree->{nodes}[++$$i];
1437
1438							# figure out number of params
1439	2					3	my $nparams = 0;
1440	2	50				5	if ($node->{type} eq 'TEXT') {
1441	2					4	my $text = $node->{content};
1442
1443	2	100				7	if ($text =~ /^\s* \[\s* ([0-9]+) \s\] \s$/x) {
1444	1					4	$nparams = $1;
1445							}
1446
1447	2					3	$$i++;
1448							}
1449
1450	2					16	return ($command, $nparams);
1451							}
1452
1453							# make a mapping from a newenvironment fragment
1454							#
1455							# newenvironments have the following syntax:
1456							#
1457							# \newenvironment{name}[nparams]?{beginTeX}{endTeX}
1458							#
1459							sub _makeEnvMapping {
1460	1			1		2	my $parser = shift;
1461	1					3	my ($tree, $index) = @_;
1462
1463	1					2	my $i = $index;
1464
1465	1	50				3	my ($command, $nparams) = _getMapping('environment', $tree, \$i) or return undef;
1466
1467							# default templates-- just repeat the declarations
1468							#
1469	1					9	my ($btemplate) = $parser->_basicparse("\\begin{$command}", 2, 0);
1470	1					5	my ($etemplate) = $parser->_basicparse("\\end{$command}", 2, 0);
1471
1472	1					3	my $end_pos = $i;
1473
1474							# get two group subtrees... one for the begin and one for the end
1475							# templates. we only ignore whitespace TEXT nodes and comments
1476							#
1477	1					6	_skipBlankNodes($tree, \$i);
1478	1					4	my $node = $tree->{nodes}[$i];
1479
1480	1	50				2	if (_validParamNode($node)) {
1481	1					4	$btemplate = $parser->_duplicateParam($node);
1482
1483	1					4	$i++;
1484	1					4	_skipBlankNodes($tree, \$i);
1485	1					3	$node = $tree->{nodes}[$i];
1486
1487	1	50				2	if (_validParamNode($node)) {
1488	1					2	$etemplate = $parser->_duplicateParam($node);
1489	1					4	$end_pos = $i;
1490							}
1491							}
1492
1493							# build and return the mapping hash
1494							#
1495							return {
1496	1					11	type => 'environment',
1497							name => $command,
1498							nparams => $nparams,
1499							btemplate => $btemplate, # begin template
1500							etemplate => $etemplate, # end template
1501							skip => $end_pos - $index,
1502							};
1503							}
1504
1505							# make a mapping from a newcommand fragment
1506							# takes tree pointer and index of command node
1507							#
1508							# newcommands have the following syntax:
1509							#
1510							# \newcommand{\name}[nparams]?{anyTeX}
1511							#
1512							sub _makeMapping {
1513	1			1		4	my ($tree, $index) = @_;
1514
1515	1					2	my $i = $index;
1516
1517	1	50				3	my ($command, $nparams) = _getMapping('command', $tree, \$i) or return undef;
1518
1519							# grab subtree template (array ref)
1520							#
1521	1					12	my $node = $tree->{nodes}[$i];
1522	1					1	my $template;
1523
1524	1	50				6	if ($node->{type} eq 'GROUP') {
1525	1					3	$template = $node->{children}->copy();
1526							}
1527							else {
1528	0					0	return undef;
1529							}
1530
1531							# build and return the mapping hash
1532							#
1533							return {
1534	1					6	type => 'command',
1535							name => $command,
1536							nparams => $nparams,
1537							template => $template,
1538							skip => $i - $index,
1539							};
1540							}
1541
1542							# this sub is the main entry point for the sub that actually takes a set of
1543							# parameter trees and inserts them into a template tree. the return result,
1544							# newly allocated, should be plopped back into the original tree where the
1545							# parameters (along with the initial command invocation)
1546							#
1547							sub _applyParamsToTemplate {
1548	2			2		7	my $template = shift;
1549	2					5	my @params = @_;
1550
1551							# have to copy the template to a freshly allocated tree
1552							#
1553	2					6	my $applied = $template->copy();
1554
1555							# now recursively apply the params.
1556							#
1557	2					77	_applyParamsToTemplate_r($applied, @params);
1558
1559	2					5	return $applied;
1560							}
1561
1562							# recursive helper for above
1563							#
1564							sub _applyParamsToTemplate_r {
1565	4			4		8	my $template = shift;
1566	4					9	my @params = @_;
1567
1568	4					6	for (my $i = 0; $i < @{$template->{nodes}}; $i++) {
	13					28
1569
1570	9					14	my $node = $template->{nodes}[$i];
1571
1572	9	100				25	if ($node->{type} eq 'TEXT') {
		50
1573
1574	7					11	my $text = $node->{content};
1575
1576							# find occurrences of the parameter flags
1577							#
1578	7	100				19	if ($text =~ /(#([0-9]+))/) {
1579
1580	1					3	my $all = $1;
1581	1					3	my $num = $2;
1582
1583							# get the index of the flag we just found
1584							#
1585	1					5	my $idx = index $text, $all, 0;
1586
1587							# split the node on the location of the flag
1588							#
1589	1					5	my ($leftnode, $rightnode) = $node->split($idx, $idx + length($all) - 1);
1590
1591							# make a copy of the param we want
1592							#
1593	1					53	my $param = $params[$num - 1]->copy();
1594
1595							# splice the new text nodes, along with the parameter subtree, into
1596							# the old location
1597							#
1598	1					2	splice @{$template->{nodes}}, $i, 1, $leftnode, @{$param->{nodes}}, $rightnode;
	1					3
	1					4
1599
1600							# skip forward to where $rightnode is in $template on next iteration
1601							#
1602	1					1	$i += scalar @{$param->{nodes}};
	1					5
1603							}
1604							}
1605
1606							# recur
1607							elsif (defined $node->{children}) {
1608
1609	2					8	_applyParamsToTemplate_r($node->{children}, @params);
1610							}
1611							}
1612							}
1613
1614
1615							# This sub takes a chunk of the document text between two points and makes
1616							# it into a list of TEXT nodes and COMMENT nodes, as we would expect from
1617							# '%' prefixed LaTeX comment lines
1618							#
1619							sub _getTextAndCommentNodes {
1620	27			27		64	my ($text, $begins, $ends) = @_;
1621
1622	27					94	my $node_text = substr $text, $begins, $ends - $begins;
1623
1624	27					118	_debug("getTextAndCommentNodes: looking at [$node_text]", undef);
1625
1626							my $make_node = sub {
1627	26			26		67	my ($mode_type, $begins, $start_pos, $output) = @_;
1628
1629	26					238	return LaTeX::TOM::Node->_new({
1630							type => uc $mode_type,
1631							start => $begins + $start_pos,
1632							end => $begins + $start_pos + length($output) - 1,
1633							content => $output,
1634							});
1635	27					133	};
1636
1637	27					112	my @lines = split (/(
1638							(?:\s* # whitespace
1639							(?
1640							\%[^\n]* # comment
1641							\n)+ # newline
1642							)/mx, $node_text);
1643
1644	27					47	my @nodes;
1645
1646	27					42	my $start_pos = 0;
1647	27					44	my $output;
1648							my $mode_type;
1649	27					47	my $first = true;
1650
1651	27					60	foreach my $line (@lines) {
1652
1653	26	50	33			113	my $line_type = (
1654							$line =~ /^\s*\%/
1655							&& $node_text !~ /
1656							\\begin\{verbatim\}
1657							.* \Q$line\E .*
1658							\\end\{verbatim\}
1659							/sx
1660							) ? 'comment' : 'text';
1661
1662							# if type stays the same, add to output and do nothing
1663	26	50	33			95	if ($first \|\| $line_type eq $mode_type) {
1664
1665	26					63	$output .= $line;
1666
1667							# handle turning off initialization stuff
1668	26		50			140	$first &&= false;
1669	26		33			113	$mode_type \|\|= $line_type;
1670							}
1671
1672							# if type changes, make new node from current chunk, change mode type
1673							# and start a new chunk
1674							else {
1675	0					0	push @nodes, $make_node->($mode_type, $begins, $start_pos, $output);
1676
1677	0					0	$start_pos += length($output); # update start position
1678	0					0	$output = $line;
1679
1680	0					0	$mode_type = $line_type;
1681							}
1682							}
1683
1684	27	100				94	push @nodes, $make_node->($mode_type, $begins, $start_pos, $output) if defined $output;
1685
1686	27					168	return @nodes;
1687							}
1688
1689							# Read in the contents of a text file on disk. Return in string scalar.
1690							#
1691							sub _readFile {
1692	16			16		47	my ($file, $raise_error) = @_;
1693
1694	16		100			78	$raise_error \|\|= false;
1695
1696	16					696	my $opened = open(my $fh, '<', $file);
1697
1698	16	50				80	unless ($opened) {
1699	0	0				0	croak "Cannot open `$file': $!" if $raise_error;
1700	0					0	return undef;
1701							}
1702
1703	16					32	my $contents = do { local $/; <$fh> };
	16					82
	16					19094
1704	16					193	close($fh);
1705
1706	16					105	return $contents;
1707							}
1708
1709							sub _debug {
1710	920			920		1554	my ($message, $code) = @_;
1711
1712	920					1204	my $DEBUG = $LaTeX::TOM::DEBUG;
1713
1714	920	50	33			2128	return unless $DEBUG >= 1 && $DEBUG <= 2;
1715
1716	0						my ($filename, $line) = (caller)[1,2];
1717	0						my $caller = join ':', (fileparse($filename))[0], $line;
1718
1719	0	0	0				warn "$caller: $message\n" if $DEBUG >= 1 && defined $message;
1720	0	0	0				$code->() if $DEBUG == 2 && defined $code;
1721							}
1722
1723							1;