File Coverage

blib/lib/Unisyn/Parse.pm

Criterion	Covered	Total	%
statement	49	1074	4.5
branch	2	100	2.0
condition	0	9	0.0
subroutine	15	99	15.1
pod	60	62	96.7
total	126	1344	9.3

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/NasmX86/lib/ -I/home/phil/perl/cpan/AsmC/lib/
2							#-------------------------------------------------------------------------------
3							# Parse a Unisyn expression.
4							# Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5							#-------------------------------------------------------------------------------
6							# podDocumentation
7							# Finished in 13.14s, bytes: 2,655,008, execs: 465,858
8							# Can we remove more Pushr by doing one big save in parseutf8 ?
9							package Unisyn::Parse;
10							our $VERSION = "20211008";
11	1			1		2375	use warnings FATAL => qw(all);
	1					7
	1					34
12	1			1		4	use strict;
	1					2
	1					29
13	1			1		5	use Carp qw(confess cluck);
	1					2
	1					79
14	1			1		431	use Data::Dump qw(dump);
	1					6885
	1					59
15	1			1		3305	use Data::Table::Text qw(:all !parse);
	1					125876
	1					1621
16	1			1		5747	use Nasm::X86 qw(:all);
	1					146055
	1					2705
17	1			1		12	use feature qw(say current_sub);
	1					3
	1					99
18	1			1		6	use utf8;
	1					2
	1					9
19
20							makeDieConfess;
21
22							my $develop = -e q(/home/phil/); # Developing
23							our $Parse; # One of the advantages of creating a parse tree is that we can perform parse one at a time making it safe to globalize this variable. The alternative is to pass this variable between all the parsing calls which would obscure their workings greatly.
24							our $ParseUtf8SubDef; # The definition of the subroutine that does the parsing so that we can reuse its parameters when we call L.
25							our $debug = 0; # Print evolution of stack if true.
26
27							#D1 Create # Create a Unisyn parse of a utf8 string.
28
29							sub create($%) # Create a new unisyn parse from a utf8 string.
30	0			0	1	0	{my ($address, %options) = @_; # Address of a zero terminated utf8 source string to parse as a variable, parse options.
31	0	0				0	@_ >= 1 or confess "One or more parameters";
32
33	0					0	my $a = CreateArena; # Arena to hold parse tree - every parse tree gets its own arena so that we can free parses separately
34	0					0	my $size = StringLength string => $address; # Length of input utf8
35
36	0					0	my $p = $Parse = genHash(__PACKAGE__, # Description of parse
37							arena => $a, # Arena containing tree
38							size8 => $size, # Size of source string as utf8
39							address8 => $address, # Address of source string as utf8
40							source32 => V(source32), # Source text as utf32
41							sourceSize32 => V(sourceSize32), # Size of utf32 allocation
42							sourceLength32 => V(sourceLength32), # Length of utf32 string
43							parse => V('parse'), # Offset to the head of the parse tree
44							fails => V('fail'), # Number of failures encountered in this parse
45							quarks => $a->CreateQuarks, # Quarks representing the strings used in this parse
46							operators => undef, # Methods implementing each lexical operator
47							);
48
49	0	0				0	if (my $o = $options{operators}) # Operator methods for lexical items
50	0					0	{$p->operators = $a->CreateQuarks; # Create quark set to translate operator names to offsets
51	0					0	$o->($p);
52							}
53
54	0					0	$p->parseUtf8; # Parse utf8 source string
55
56	0					0	$p
57							}
58
59							#D1 Parse # Parse Unisyn expressions
60
61							our $Lex = &lexicalData; # Lexical table definitions
62
63							our $ses = RegisterSize rax; # Size of an element on the stack
64							our ($w1, $w2, $w3) = (r8, r9, r10); # Work registers
65							our $prevChar = r11; # The previous character parsed
66							our $index = r12; # Index of current element
67							our $element = r13; # Contains the item being parsed
68							our $start = r14; # Start of the parse string
69							our $size = r15; # Length of the input string
70							our $parseStackBase = rsi; # The base of the parsing stack in the stack
71							#ur $arenaReg = rax; # The arena in which we are building the parse tree
72							our $indexScale = 4; # The size of a utf32 character
73							our $lexCodeOffset = 3; # The offset in a classified character to the lexical code.
74							our $bitsPerByte = 8; # The number of bits in a byte
75
76							our $Ascii = $$Lex{lexicals}{Ascii} {number}; # Ascii
77							our $assign = $$Lex{lexicals}{assign} {number}; # Assign
78							our $dyad = $$Lex{lexicals}{dyad} {number}; # Dyad
79							our $CloseBracket = $$Lex{lexicals}{CloseBracket} {number}; # Close bracket
80							our $empty = $$Lex{lexicals}{empty} {number}; # Empty element
81							our $NewLineSemiColon = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semicolon
82							our $OpenBracket = $$Lex{lexicals}{OpenBracket} {number}; # Open bracket
83							our $prefix = $$Lex{lexicals}{prefix} {number}; # Prefix operator
84							our $semiColon = $$Lex{lexicals}{semiColon} {number}; # Semicolon
85							our $suffix = $$Lex{lexicals}{suffix} {number}; # Suffix
86							our $term = $$Lex{lexicals}{term} {number}; # Term
87							our $variable = $$Lex{lexicals}{variable} {number}; # Variable
88							our $WhiteSpace = $$Lex{lexicals}{WhiteSpace} {number}; # Variable
89							our $firstSet = $$Lex{structure}{first}; # First symbols allowed
90							our $lastSet = $$Lex{structure}{last}; # Last symbols allowed
91							our $bracketsBase = $$Lex{bracketsBase}; # Base lexical item for brackets
92
93							our $asciiNewLine = ord("\n"); # New line in ascii
94							our $asciiSpace = ord(' '); # Space in ascii
95
96							our $lexItemType = 0; # Field number of lexical item type in the description of a lexical item
97							our $lexItemOffset = 1; # Field number of the offset in the utf32 source of the lexical item in the description of a lexical item or - if this a term - the offset of the invariant first block of the sub tree
98							our $lexItemLength = 2; # Field number of the length of the lexical item in the utf32 source in the description of a lexical item
99							our $lexItemQuark = 3; # Quark containing the text of this lexical item.
100							our $lexItemWidth = 4; # The number of fields used to describe a lexical item in the parse tree
101
102							our $opType = 0; # Operator type field - currently always a term
103							our $opCount = 1; # Number of operands for this operator
104							our $opSub = 2; # Offset of sub associated with this lexical item
105
106							sub getAlpha($$$) #P Load the position of a lexical item in its alphabet from the current character.
107	0			0	1	0	{my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
108	0					0	Mov $register, "[$address+$indexScale*$index]"; # Load lexical code
109							}
110
111							sub getLexicalCode($$$) #P Load the lexical code of the current character in memory into the specified register.
112	0			0	1	0	{my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
113	0					0	Mov $register, "[$address+$indexScale*$index+$lexCodeOffset]"; # Load lexical code
114							}
115
116							sub putLexicalCode($$$$) #P Put the specified lexical code into the current character in memory.
117	0			0	1	0	{my ($register, $address, $index, $code) = @_; # Register used to load code, address of string, index into string, code to put
118	0					0	Mov $register, $code;
119	0					0	Mov "[$address+$indexScale*$index+$lexCodeOffset]", $register; # Save lexical code
120							}
121
122							sub loadCurrentChar() #P Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
123	0			0	1	0	{my $r = $element."b"; # Classification byte
124
125	0					0	Mov $element, $index; # Load index of character as upper dword
126	0					0	Shl $element, $indexScale * $bitsPerByte; # Save the index of the character in the upper half of the register so that we know where the character came from.
127	0					0	getLexicalCode $r, $start, $index; # Load lexical classification as lowest byte
128
129	0					0	Cmp $r, $bracketsBase; # Brackets , due to their frequency, start after 0x10 with open even and close odd
130							IfGe # Brackets
131							Then
132	0			0		0	{And $r, 1 # Bracket: 0 - open, 1 - close
133							},
134							Else
135	0			0		0	{Cmp $r, $Ascii; # Ascii is a type of variable
136							IfEq
137							Then
138	0					0	{Mov $r, $variable;
139							},
140							Else
141	0					0	{Cmp $r, $NewLineSemiColon; # New line semicolon is a type of semi colon
142							IfEq
143							Then
144	0					0	{Mov $r, $semiColon;
145	0					0	};
146	0					0	};
147	0					0	};
148							}
149
150							sub checkStackHas($) #P Check that we have at least the specified number of elements on the stack.
151	0			0	1	0	{my ($depth) = @_; # Number of elements required on the stack
152	0					0	Mov $w1, $parseStackBase;
153	0					0	Sub $w1, rsp;
154	0					0	Cmp $w1, $ses * $depth;
155							}
156
157							sub pushElement() #P Push the current element on to the stack.
158	0			0	1	0	{Push $element;
159	0	0				0	if ($debug)
160	0					0	{PrintErrStringNL "Push Element:";
161	0					0	PrintErrRegisterInHex $element;
162							}
163							}
164
165							sub pushEmpty() #P Push the empty element on to the stack.
166	0			0	1	0	{Mov $w1, $index;
167	0					0	Shl $w1, $indexScale * $bitsPerByte;
168	0					0	Or $w1, $empty;
169	0					0	Push $w1;
170	0	0				0	if ($debug)
171	0					0	{PrintErrStringNL "Push Empty";
172							}
173							}
174
175							sub lexicalNameFromLetter($) #P Lexical name for a lexical item described by its letter.
176	0			0	1	0	{my ($l) = @_; # Letter of the lexical item
177	0					0	my %l = $Lex->{treeTermLexicals}->%*;
178	0					0	my $n = $l{$l};
179	0	0				0	confess "No such lexical: $l" unless $n;
180							$n->{short}
181	0					0	}
182
183							sub lexicalNumberFromLetter($) #P Lexical number for a lexical item described by its letter.
184	0			0	1	0	{my ($l) = @_; # Letter of the lexical item
185	0					0	my $n = lexicalNameFromLetter $l;
186	0					0	my $N = $Lex->{lexicals}{$n}{number};
187	0	0				0	confess "No such lexical named: $n" unless defined $N;
188	0					0	$N
189							}
190
191							sub lexicalItemLength($$) #P Put the length of a lexical item into variable B.
192	0			0	1	0	{my ($source32, $offset) = @_; # B of utf32 source representation, B to lexical item in utf32
193
194							my $s = Subroutine
195	0			0		0	{my ($p, $s) = @_; # Parameters
196							# PushR r14, r15; # We do not need to save the zmm and mask registers because they are only used as temporary work registers and they have been saved in L
197
198	0					0	$$p{source32}->setReg(r14);
199	0					0	$$p{offset} ->setReg(r15);
200	0					0	Vmovdqu8 zmm0, "[r14+4*r15]"; # Load source to examine
201	0					0	Pextrw r15, xmm0, 1; # Extract lexical type of first element
202
203							OrBlock # The size of a bracket or a semi colon is always 1
204	0					0	{my ($pass, $end, $start) = @_;
205	0					0	Cmp r15, $OpenBracket;
206	0					0	Je $pass;
207	0					0	Cmp r15, $CloseBracket;
208	0					0	Je $pass;
209	0					0	Cmp r15, $semiColon;
210	0					0	Je $pass;
211
212	0					0	Vpbroadcastw zmm1, r15w; # Broadcast lexical type
213	0					0	Vpcmpeqw k0, zmm0, zmm1; # Check extent of first lexical item up to 16
214	0					0	Mov r15, 0x55555555; # Set odd positions to one where we know the match will fail
215	0					0	Kmovq k1, r15;
216	0					0	Korq k2, k0, k1; # Fill in odd positions
217
218	0					0	Kmovq r15, k2;
219	0					0	Not r15; # Swap zeroes and ones
220	0					0	Tzcnt r15, r15; # Trailing zero count is a factor two too big
221	0					0	Shr r15, 1; # Normalized count of number of characters in lexical item
222	0					0	$$p{size}->getReg(r15); # Save size in supplied variable
223							}
224							Pass # Show unitary length
225	0					0	{my ($end, $pass, $start) = @_;
226	0					0	$$p{size}->getConst(1); # Save size in supplied variable
227	0					0	};
228
229							# PopR;
230	0					0	} [qw(offset source32 size)],
231							name => q(Unisyn::Parse::lexicalItemLength);
232
233	0					0	$s->call(offset => $offset, source32 => $source32, my $size = V(size));
234
235	0					0	$size
236							}
237
238							sub new($$) #P Create a new term in the parse tree rooted on the stack.
239	0			0	1	0	{my ($depth, $description) = @_; # Stack depth to be converted, text reason why we are creating a new term
240
241	0					0	my $wr = RegisterSize rax; # Width of general purpose register
242
243							my $s = Subroutine
244	0			0		0	{my ($locals) = @_; # Parameters
245	0					0	my $a = DescribeArena $$locals{bs}; # Address arena
246
247							my $quarks = $Parse->quarks->reload(arena => $$locals{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
248							array => $$locals{numbersToStringsFirst},
249	0					0	tree => $$locals{stringsToNumbersFirst});
250
251							my $operators = $Parse->operators ? $Parse->operators->reload # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
252							(arena => $$locals{bs},
253							array => $$locals{opNumbersToStringsFirst},
254	0	0				0	tree => $$locals{opStringsToNumbersFirst}) : undef;
255
256	0					0	my $t = $a->CreateTree; # Create a tree in the arena to hold the details of the lexical elements on the stack
257	0					0	my $o = V(offset); # Offset into source for lexical item
258	0					0	$t->insert(V(key, $opType), K(data, $term)); # Create a term - we only have terms at the moment in the parse tree - but that might change in the future
259	0					0	$t->insert(V(key, $opCount), K(data, $depth)); # The number of elements in the term which is the number of operands for the operator
260
261	0					0	my $liOnStack = $w1; # The lexical item as it appears on the stack
262	0					0	my $liType = $w2; # The lexical item type
263	0					0	my $liOffset = $w3; # The lexical item offset in the source
264
265	0					0	PushR zmm0; # Put the simulated stack on the stack
266
267	0					0	for my $i(1..$depth) # Each term
268	0					0	{my $j = $depth + 1 - $i;
269	0					0	my $k = ($i - 1) * $wr; # Position in simulated stack
270	0					0	Mov $liOnStack, "[rsp+$k]"; # Copy term out of simulated stack
271	0	0				0	PrintErrRegisterInHex $liOnStack if $debug;
272
273	0					0	Mov $liOffset, $liOnStack; # Offset of either the text in the source or the offset of the first block of the tree describing a term
274	0					0	Shr $liOffset, 32; # Offset in source: either the actual text of the offset of the first block of the tree containing a term shifted over to look as if it were an offset in the source
275	0					0	$o->getReg($liOffset); # Offset of lexical item in source or offset of first block in tree describing a term
276
277	0					0	ClearRegisters $liType;
278	0					0	Mov $liType."b", $liOnStack."b"; # The lexical item type in the lowest byte, the rest clear.
279
280	0					0	Cmp $liType, $term; # Check whether the lexical item on the stack is a term
281							IfEq # Insert a sub tree if we are inserting a term
282							Then
283	0					0	{$t->insertTree(K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset of first block in the tree representing the term
284							},
285							Else # Insert the offset in the utf32 source if we are not on a term
286	0					0	{$t->insert (K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset in source of non term
287	0					0	};
288
289	0					0	Cmp $liType, $variable; # Check whether the lexical item is a variable which can also represent ascii
290							IfEq # Insert a sub tree if we are inserting a term
291							Then
292	0					0	{Mov $liType."b", "[$start+4*$liOffset+3]"; # Load lexical type from source
293	0					0	};
294
295	0					0	Cmp $liType, $term; # Length of lexical item that is not a term
296							IfNe
297							Then # Not a term
298	0					0	{my $size = lexicalItemLength(V(address, $start), $o); # Get the size of the lexical item at the offset indicated on the stack
299	0					0	$t->insert(V(key, $lexItemWidth * $j + $lexItemLength), $size); # Save size of lexical item in parse tree
300
301	0					0	my $s = CreateShortString(1); # Short string to hold text of lexical item so we can load it into a quark
302	0					0	$s->clear; # Perhaps not strictly necessary but easier to debug
303	0					0	PushR r15; # Probably not needed as saved in L
304	0	0	0			0	r15 ne $start && r15 ne $liOffset or confess "r15 in use";
305	0					0	Lea r15, "[$start+4*$liOffset]"; # Start address of lexical item
306	0					0	my $startAddress = V(address, r15); # Save start address of lexical item
307	0					0	PopR;
308
309	0					0	Cmp $liType, $OpenBracket; # Is it a bracket ?
310							IfEq
311							Then
312	0					0	{ClearRegisters $liType; # Compute lexical type of bracket by adding bracket number to the start of the bracket range
313	0					0	Mov $liType."b", "[$start+4*$liOffset+3]"; # Load bracket number
314	0					0	Shl $liType, 16; # Shift bracket base into position
315	0					0	Add $liType, 2; # Set length of short string as two = (lexical type, bracket number)
316	0					0	Pinsrd "xmm1", $liType."d", 0; # Load short string
317	0					0	Shr $liType, 16; # Move lexical type back into position for insertion into the parse tree
318							},
319							Else
320	0					0	{$s->loadDwordBytes(0, $startAddress, $size, 1); # Load text of lexical item into short string leaving space for lexical type
321	0					0	Pinsrb "xmm1", $liType."b", 1; # Set lexical type as the first byte of the short string
322	0					0	};
323
324	0					0	my $q = $quarks->quarkFromShortString($s); # Find the quark matching the lexical item if there is such a quark
325	0					0	$t->insert(V(key, $lexItemWidth * $j + $lexItemQuark), $q); # Save quark number of lexical item in parse tree
326	0	0				0	if ($operators) # The parse has operator definitions
327	0	0				0	{if ($j == 1) # The operator quark is always first
328							{OrBlock # Like an operator or like a variable?
329	0					0	{my ($pass, $end, $start) = @_;
330	0					0	Cmp $liType, $variable;
331	0					0	Je $pass; # Process a variable
332	0					0	Cmp $liType, $Ascii;
333	0					0	Je $pass; # Process ascii constant
334	0					0	Cmp $liType, $semiColon;
335	0					0	Je $pass; # Process Semicolon
336	0					0	Cmp $liType, $NewLineSemiColon;
337	0					0	Je $pass; # Process new line semicolon
338							# Process non variable, i.e. operators specifically
339	0					0	my $N = $operators->subFromQuarkViaQuarks($quarks, $q); # Look up the subroutine associated with this operator
340							If $N >= 0, # Found a matching operator subroutine
341							Then
342	0					0	{$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
343	0					0	};
344							}
345							Pass # Process variables in general or items based on variables using a short string of length 1 being the lexical type of the item in question
346	0					0	{Shl $liType, 8; # Move lexical type into second byte
347	0					0	Inc $liType; # Show length
348	0					0	Pinsrq "xmm1", $liType, 0; # Load short string
349	0					0	my $N = $operators->subFromShortString($s); # Address of sub to process variable or ascii or semicolon
350	0					0	Shr $liType, 8; # Restore lexical type
351							If $N >= 0, # Found a matching operator subroutine
352							Then
353	0					0	{$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
354	0					0	};
355	0					0	};
356							}
357							}
358	0					0	};
359
360	0					0	$t->insert (V(key, $lexItemWidth * $j + $lexItemType), # Save lexical type in parse tree
361							V(data)->getReg($liType));
362							}
363							# Push new term onto the stack in place of the items popped off
364	0					0	$t->first->setReg($liOffset); # Offset of new term tree
365	0					0	Shl $liOffset, 32; # Push offset to term tree into the upper dword to make it look like a source offset
366	0					0	Or $liOffset."b", $term; # Mark as a term tree
367	0					0	$$locals{new}->getReg($liOffset); # New term comprised of a tree of old terms
368	0					0	PopR; # Restore stack to its position at the start
369							}
370	0					0	[qw(new)], with => $ParseUtf8SubDef,
371							# [qw(bs new
372							# numbersToStringsFirst stringsToNumbersFirst
373							# opNumbersToStringsFirst opStringsToNumbersFirst
374							# )],
375							name=>"Unisyn::Parse::new_$depth";
376
377	0	0				0	PrintErrStringNL "New: $description" if $debug;
378
379	0	0				0	if ($depth == 1) {Mov $w1, 1} # Copy the top of the real stack which holds the parse state to zmm0 so that we can adjust the stack to call L
	0	0				0
380	0					0	elsif ($depth == 2) {Mov $w1, 3}
381	0					0	else {Mov $w1, 7}
382	0					0	Kmovq k1, $w1; # B is saved in L
383	0					0	Vmovdqu64 "zmm0{k1}", "[rsp]"; # Copy top lexical items on stack
384
385							# $s->call(bs => $Parse->arena->bs, my $new = V('new'),
386							# numbersToStringsFirst => $Parse->quarks->numbersToStrings->first,
387							# stringsToNumbersFirst => $Parse->quarks->stringsToNumbers->first,
388							# opNumbersToStringsFirst => $Parse->operators ? $Parse->operators->subQuarks->numbersToStrings->first : 0,
389							# opStringsToNumbersFirst => $Parse->operators ? $Parse->operators->subQuarks->stringsToNumbers->first : 0,
390							# );
391
392	0					0	$s->call(my $new = V('new'));
393
394	0					0	$new->setReg($w1); # Save offset of new term in a work register
395	0					0	Add rsp, $depth * $wr; # Remove input terms from stack
396	0					0	Push $w1; # Save new term on stack
397							}
398
399							sub error($) #P Write an error message and stop.
400	0			0	1	0	{my ($message) = @_; # Error message
401	0					0	PrintOutStringNL "Error: $message";
402	0					0	PrintOutString "Element: ";
403	0					0	PrintOutRegisterInHex $element;
404	0					0	PrintOutString "Index : ";
405	0					0	PrintOutRegisterInHex $index;
406	0					0	Exit(0);
407							}
408
409							sub testSet($$) #P Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
410	0			0	1	0	{my ($set, $register) = @_; # Set of lexical letters, Register to test
411	0					0	my @n = map {sprintf("0x%x", lexicalNumberFromLetter $_)} split //, $set; # Each lexical item by number from letter
	0					0
412	0					0	my $end = Label;
413	0					0	for my $n(@n)
414	0					0	{Cmp $register."b", $n;
415	0					0	Je $end
416							}
417	0					0	ClearZF;
418	0					0	SetLabel $end;
419							}
420
421							sub checkSet($) #P Check that one of a set of items is on the top of the stack or complain if it is not.
422	0			0	1	0	{my ($set) = @_; # Set of lexical letters
423	0					0	my @n = map {lexicalNumberFromLetter $_} split //, $set;
	0					0
424	0					0	my $end = Label;
425
426	0					0	for my $n(@n)
427	0					0	{Cmp "byte[rsp]", $n;
428	0					0	Je $end
429							}
430	0					0	error("Expected one of: '$set' on the stack");
431	0					0	ClearZF;
432	0					0	SetLabel $end;
433							}
434
435							sub reduce($) #P Convert the longest possible expression on top of the stack into a term at the specified priority.
436	0			0	1	0	{my ($priority) = @_; # Priority of the operators to reduce
437	0					0	$priority =~ m(\A(1\|3)\Z); # Level: 1 - all operators, 2 - priority 2 operators
438	0					0	my ($success, $end) = map {Label} 1..2; # Exit points
	0					0
439
440	0					0	checkStackHas 3; # At least three elements on the stack
441							IfGe
442							Then
443	0			0		0	{my ($l, $d, $r) = ($w1, $w2, $w3);
444	0					0	Mov $l, "[rsp+".(2*$ses)."]"; # Top 3 elements on the stack
445	0					0	Mov $d, "[rsp+".(1*$ses)."]";
446	0					0	Mov $r, "[rsp+".(0*$ses)."]";
447
448	0	0				0	if ($debug)
449	0					0	{PrintErrStringNL "Reduce 3:";
450	0					0	PrintErrRegisterInHex $l, $d, $r;
451							}
452
453	0					0	testSet("t", $l); # Parse out infix operator expression
454							IfEq
455							Then
456	0					0	{testSet("t", $r);
457							IfEq
458							Then
459	0	0				0	{testSet($priority == 1 ? "ads" : 'd', $d); # Reduce all operators or just reduce infix priority 3 operators
460							IfEq
461							Then
462	0					0	{Add rsp, 3 * $ses; # Reorder into polish notation
463	0					0	Push $_ for $d, $l, $r;
464	0					0	new(3, "Term infix term");
465	0					0	Jmp $success;
466	0					0	};
467	0					0	};
468	0					0	};
469
470	0					0	testSet("b", $l); # Parse parenthesized term
471							IfEq
472							Then
473	0					0	{testSet("B", $r);
474							IfEq
475							Then
476	0					0	{testSet("t", $d);
477							IfEq
478							Then
479	0					0	{Add rsp, $ses;
480	0					0	new(1, "Bracketed term");
481	0					0	new(2, "Brackets for term");
482	0	0				0	PrintErrStringNL "Reduce by ( term )" if $debug;
483	0					0	Jmp $success;
484	0					0	};
485	0					0	};
486	0					0	};
487	0					0	};
488
489	0					0	checkStackHas 2; # At least two elements on the stack
490							IfGe # Convert an empty pair of parentheses to an empty term
491							Then
492	0			0		0	{my ($l, $r) = ($w1, $w2);
493
494	0	0				0	if ($debug)
495	0					0	{PrintErrStringNL "Reduce 2:";
496	0					0	PrintErrRegisterInHex $l, $r;
497							}
498
499							# KeepFree $l, $r; # Why ?
500	0					0	Mov $l, "[rsp+".(1*$ses)."]"; # Top 3 elements on the stack
501	0					0	Mov $r, "[rsp+".(0*$ses)."]";
502	0					0	testSet("b", $l); # Empty pair of parentheses
503							IfEq
504							Then
505	0					0	{testSet("B", $r);
506							IfEq
507							Then
508	0					0	{Add rsp, 2 * $ses; # Pop expression
509	0					0	Push $l; # Bracket as operator
510	0					0	new(1, "Empty brackets");
511	0					0	Jmp $success;
512	0					0	};
513	0					0	};
514	0					0	testSet("s", $l); # Semi-colon, close implies remove unneeded semi
515							IfEq
516							Then
517	0					0	{testSet("B", $r);
518							IfEq
519							Then
520	0					0	{Add rsp, 2 * $ses; # Pop expression
521	0					0	Push $r;
522	0	0				0	PrintErrStringNL "Reduce by ;)" if $debug;
523	0					0	Jmp $success;
524	0					0	};
525	0					0	};
526	0					0	testSet("p", $l); # Prefix, term
527							IfEq
528							Then
529	0					0	{testSet("t", $r);
530							IfEq
531							Then
532	0					0	{new(2, "Prefix term");
533	0					0	Jmp $success;
534	0					0	};
535	0					0	};
536							# KeepFree $l, $r;
537	0					0	};
538
539	0					0	ClearZF; # Failed to match anything
540	0					0	Jmp $end;
541
542	0					0	SetLabel $success; # Successfully matched
543	0					0	SetZF;
544
545	0					0	SetLabel $end; # End
546							} # reduce
547
548							sub reduceMultiple($) #P Reduce existing operators on the stack.
549	0			0	1	0	{my ($priority) = @_; # Priority of the operators to reduce
550							K('count',99)->for(sub # An improbably high but finite number of reductions
551	0			0		0	{my ($index, $start, $next, $end) = @_; # Execute body
552	0					0	reduce($priority);
553	0					0	Jne $end; # Keep going as long as reductions are possible
554	0					0	});
555							}
556
557							sub accept_a() #P Assign.
558	0			0	1	0	{checkSet("t");
559	0					0	reduceMultiple 2;
560	0	0				0	PrintErrStringNL "accept a" if $debug;
561	0					0	pushElement;
562							}
563
564							sub accept_b #P Open.
565	0			0	1	0	{checkSet("abdps");
566	0	0				0	PrintErrStringNL "accept b" if $debug;
567	0					0	pushElement;
568							}
569
570							sub accept_B #P Closing parenthesis.
571	0			0	1	0	{checkSet("bst");
572	0	0				0	PrintErrStringNL "accept B" if $debug;
573	0					0	reduceMultiple 1;
574	0					0	pushElement;
575	0					0	reduceMultiple 1;
576	0					0	checkSet("bst");
577							}
578
579							sub accept_d #P Infix but not assign or semi-colon.
580	0			0	1	0	{checkSet("t");
581	0	0				0	PrintErrStringNL "accept d" if $debug;
582	0					0	pushElement;
583							}
584
585							sub accept_p #P Prefix.
586	0			0	1	0	{checkSet("abdps");
587	0	0				0	PrintErrStringNL "accept p" if $debug;
588	0					0	pushElement;
589							}
590
591							sub accept_q #P Post fix.
592	0			0	1	0	{checkSet("t");
593	0	0				0	PrintErrStringNL "accept q" if $debug;
594							IfEq # Post fix operator applied to a term
595							Then
596	0			0		0	{Pop $w1;
597	0					0	pushElement;
598	0					0	Push $w1;
599	0					0	new(2, "Postfix");
600							}
601	0					0	}
602
603							sub accept_s #P Semi colon.
604	0			0	1	0	{checkSet("bst");
605	0	0				0	PrintErrStringNL "accept s" if $debug;
606	0					0	Mov $w1, "[rsp]";
607	0					0	testSet("s", $w1);
608							IfEq # Insert an empty element between two consecutive semicolons
609							Then
610	0			0		0	{pushEmpty;
611	0					0	};
612	0					0	reduceMultiple 1;
613	0					0	pushElement;
614							}
615
616							sub accept_v #P Variable.
617	0			0	1	0	{checkSet("abdps");
618	0	0				0	PrintErrStringNL "accept v" if $debug;
619	0					0	pushElement;
620	0					0	new(1, "Variable");
621							V(count,99)->for(sub # Reduce prefix operators
622	0			0		0	{my ($index, $start, $next, $end) = @_;
623	0					0	checkStackHas 2;
624	0					0	Jl $end;
625	0					0	my ($l, $r) = ($w1, $w2);
626	0					0	Mov $l, "[rsp+".(1*$ses)."]";
627	0					0	Mov $r, "[rsp+".(0*$ses)."]";
628	0					0	testSet("p", $l);
629	0					0	Jne $end;
630	0					0	new(2, "Prefixed variable");
631	0					0	});
632							}
633
634							sub parseExpression() #P Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
635	0			0	1	0	{my $end = Label;
636	0					0	my $eb = $element."b"; # Contains a byte from the item being parsed
637
638	0					0	Cmp $size, 0; # Check for empty expression
639	0					0	Je $end;
640
641	0					0	loadCurrentChar; # Load current character
642							### Need test for ignorable white space as first character
643	0					0	testSet($firstSet, $element);
644							IfNe
645							Then
646	0			0		0	{error(<
647							Expression must start with 'opening parenthesis', 'prefix
648							operator', 'semi-colon' or 'variable'.
649							END
650	0					0	};
651
652	0					0	testSet("v", $element); # Single variable
653							IfEq
654							Then
655	0			0		0	{pushElement;
656	0					0	new(1, "accept initial variable");
657							},
658							Else
659	0			0		0	{testSet("s", $element); # Semi
660							IfEq
661							Then
662	0					0	{pushEmpty;
663	0					0	new(1, "accept initial semicolon");
664	0					0	};
665	0					0	pushElement;
666	0					0	};
667
668	0					0	Inc $index; # We have processed the first character above
669	0					0	Mov $prevChar, $element; # Initialize the previous lexical item
670
671							For # Parse each utf32 character after it has been classified
672	0			0		0	{my ($start, $end, $next) = @_; # Start and end of the classification loop
673	0					0	loadCurrentChar; # Load current character
674
675	0	0				0	PrintErrRegisterInHex $element if $debug;
676
677	0					0	Cmp $eb, $WhiteSpace;
678	0					0	Je $next; # Ignore white space
679
680	0					0	Cmp $eb, 1; # Brackets are singular but everything else can potential be a plurality
681							IfGt
682							Then
683	0					0	{Cmp $prevChar."b", $eb; # Compare with previous element known not to be white space or a bracket
684	0					0	Je $next
685	0					0	};
686	0					0	Mov $prevChar, $element; # Save element to previous element now we know we are on a different element
687
688	0					0	for my $l(sort keys $Lex->{lexicals}->%*) # Each possible lexical item after classification
689	0					0	{my $x = $Lex->{lexicals}{$l}{letter};
690	0	0				0	next unless $x; # Skip characters that do not have a letter defined for Tree::Term because the lexical items needed to layout a file of lexical items are folded down to the actual lexical items required to represent the language independent of the textual layout with white space.
691
692	0					0	my $n = $Lex->{lexicals}{$l}{number};
693	0					0	Comment "Compare to $n for $l";
694	0					0	Cmp $eb, $n;
695
696							IfEq
697							Then
698	0					0	{eval "accept_$x";
699	0					0	Jmp $next
700	0					0	};
701							}
702	0					0	error("Unexpected lexical item"); # Not selected
703	0					0	} $index, $size;
704
705	0					0	testSet($lastSet, $prevChar); # Last lexical element
706							IfNe # Incomplete expression
707							Then
708	0			0		0	{error("Incomplete expression");
709	0					0	};
710
711							K('count', 99)->for(sub # Remove trailing semicolons if present
712	0			0		0	{my ($index, $start, $next, $end) = @_; # Execute body
713	0					0	checkStackHas 2;
714	0					0	Jl $end; # Does not have two or more elements
715	0					0	Pop $w1;
716	0					0	testSet("s", $w1); # Check that the top most element is a semi colon
717							IfNe # Not a semi colon so put it back and finish the loop
718							Then
719	0					0	{Push $w1;
720	0					0	Jmp $end;
721	0					0	};
722	0					0	});
723
724	0					0	reduceMultiple 1; # Final reductions
725
726	0					0	checkStackHas 1;
727							IfNe # Incomplete expression
728							Then
729	0			0		0	{error("Multiple expressions on stack");
730	0					0	};
731
732	0					0	Pop r15; # The resulting parse tree
733	0					0	Shr r15, 32; # The offset of the resulting parse tree
734	0					0	SetLabel $end;
735							} # parseExpression
736
737							sub MatchBrackets(@) #P Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
738	0			0	1	0	{my (@parameters) = @_; # Parameters
739	0	0				0	@_ >= 1 or confess "One or more parameters";
740
741							my $s = Subroutine
742	0			0		0	{my ($p) = @_; # Parameters
743	0					0	Comment "Match brackets in utf32 text";
744
745	0					0	my $finish = Label;
746	0					0	PushR xmm0, k7, r10, r11, r12, r13, r14, r15, rsi; # R15 current character address. r14 is the current classification. r13 the last classification code. r12 the stack depth. r11 the number of opening brackets found. r10 address of first utf32 character.
747
748	0					0	Mov rsi, rsp; # Save stack location so we can use the stack to record the brackets we have found
749	0					0	ClearRegisters r11, r12, r15; # Count the number of brackets and track the stack depth, index of each character
750	0					0	K(three, 3)->setMaskFirst(k7); # These are the number of bytes that we are going to use for the offsets of brackets which limits the size of a program to 24 million utf32 characters
751	0					0	$$p{fail} ->getReg(r11); # Clear failure indicator
752	0					0	$$p{opens} ->getReg(r11); # Clear count of opens
753	0					0	$$p{address}->setReg(r10); # Address of first utf32 character
754	0					0	my $w = RegisterSize eax; # Size of a utf32 character
755
756							$$p{size}->for(sub # Process each utf32 character in the block of memory
757	0					0	{my ($index, $start, $next, $end) = @_;
758	0					0	my $continue = Label;
759
760	0					0	Mov r14b, "[r10+$w*r15+3]"; # Classification character
761
762	0					0	Cmp r14, 0x10; # First bracket
763	0					0	Jl $continue; # Less than first bracket
764	0					0	Cmp r14, 0x4f; # Last bracket
765	0					0	Jg $continue; # Greater than last bracket
766
767	0					0	Test r14, 1; # Zero means that the bracket is an opener
768							IfZ sub # Save an opener then continue
769	0					0	{Push r15; # Save position in input
770	0					0	Push r14; # Save opening code
771	0					0	Inc r11; # Count number of opening brackets
772	0					0	Inc r12; # Number of brackets currently open
773	0					0	Jmp $continue;
774	0					0	};
775	0					0	Cmp r12, 1; # Check that there is a bracket to match on the stack
776							IfLt sub # Nothing on stack
777	0					0	{Not r15; # Minus the offset at which the error occurred so that we can fail at zero
778	0					0	$$p{fail}->getReg(r15); # Position in input that caused the failure
779	0					0	Jmp $finish; # Return
780	0					0	};
781	0					0	Mov r13, "[rsp]"; # Peek at the opening bracket code which is on top of the stack
782	0					0	Inc r13; # Expected closing bracket
783	0					0	Cmp r13, r14; # Check for match
784							IfNe sub # Mismatch
785	0					0	{Not r15; # Minus the offset at which the error occurred so that we can fail at zero
786	0					0	$$p{fail}->getReg(r15); # Position in input that caused the failure
787	0					0	Jmp $finish; # Return
788	0					0	};
789	0					0	Pop r13; # The closing bracket matches the opening bracket
790	0					0	Pop r13; # Offset of opener
791	0					0	Dec r12; # Close off bracket sequence
792	0					0	Vpbroadcastq xmm0, r15; # Load offset of opener
793	0					0	Vmovdqu8 "[r10+$w*r13]\{k7}", xmm0; # Save offset of opener in the code for the closer - the classification is left intact so we still know what kind of bracket we have
794	0					0	Vpbroadcastq xmm0, r13; # Load offset of opener
795	0					0	Vmovdqu8 "[r10+$w*r15]\{k7}", xmm0; # Save offset of closer in the code for the openercloser - the classification is left intact so we still know what kind of bracket we have
796	0					0	SetLabel $continue; # Continue with next character
797	0					0	Inc r15; # Next character
798	0					0	});
799
800	0					0	SetLabel $finish;
801	0					0	Mov rsp, rsi; # Restore stack
802	0					0	$$p{opens}->getReg(r11); # Number of brackets opened
803	0					0	PopR;
804	0					0	} [qw(address size fail opens)], name => q(Unisyn::Parse::MatchBrackets);
805
806	0					0	$s->call(@parameters);
807							} # MatchBrackets
808
809							sub ClassifyNewLines(@) #P Scan input string looking for opportunities to convert new lines into semi colons.
810	0			0	1	0	{my (@parameters) = @_; # Parameters
811	0	0				0	@_ >= 1 or confess "One or more parameters";
812
813							my $s = Subroutine
814	0			0		0	{my ($p) = @_; # Parameters
815	0					0	my $current = r15; # Index of the current character
816	0					0	my $middle = r14; # Index of the middle character
817	0					0	my $first = r13; # Index of the first character
818	0					0	my $address = r12; # Address of input string
819	0					0	my $size = r11; # Length of input utf32 string
820	0					0	my($c1, $c2) = (r8."b", r9."b"); # Lexical codes being tested
821
822	0					0	PushR r8, r9, r10, r11, r12, r13, r14, r15;
823
824	0					0	$$p{address}->setReg($address); # Address of string
825	0					0	$$p{size} ->setReg($size); # Size of string
826	0					0	Mov $current, 2; Mov $middle, 1; Mov $first, 0;
	0					0
	0					0
827
828							For # Each character in input string
829	0					0	{my ($start, $end, $next) = @_; # Start, end and next labels
830
831
832	0					0	getLexicalCode $c1, $address, $middle; # Lexical code of the middle character
833	0					0	Cmp $c1, $WhiteSpace;
834							IfEq
835							Then
836	0					0	{getAlpha $c1, $address, $middle;
837
838	0					0	Cmp $c1, $asciiNewLine;
839							IfEq # Middle character is a insignificant new line and thus could be a semicolon
840							Then
841	0					0	{getLexicalCode $c1, $address, $first;
842
843							my sub makeSemiColon # Make a new line into a new line semicolon
844	0					0	{putLexicalCode $c2, $address, $middle, $NewLineSemiColon;
845							}
846
847							my sub check_bpv # Make new line if followed by 'b', 'p' or 'v'
848	0					0	{getLexicalCode $c1, $address, $current;
849	0					0	Cmp $c1, $OpenBracket;
850
851							IfEq
852							Then
853	0					0	{makeSemiColon;
854							},
855							Else
856	0					0	{Cmp $c1, $prefix;
857							IfEq
858							Then
859	0					0	{makeSemiColon;
860							},
861							Else
862	0					0	{Cmp $c1, $variable;
863							IfEq
864							Then
865	0					0	{makeSemiColon;
866	0					0	};
867	0					0	};
868	0					0	};
869							}
870
871	0					0	Cmp $c1, $CloseBracket; # Check first character of sequence
872							IfEq
873							Then
874	0					0	{check_bpv;
875							},
876							Else
877	0					0	{Cmp $c1, $suffix;
878							IfEq
879							Then
880	0					0	{check_bpv;
881							},
882							Else
883	0					0	{Cmp $c1, $variable;
884							IfEq
885							Then
886	0					0	{check_bpv;
887	0					0	};
888	0					0	};
889	0					0	};
890	0					0	};
891	0					0	};
892
893	0					0	Mov $first, $middle; Mov $middle, $current; # Find next lexical item
	0					0
894	0					0	getLexicalCode $c1, $address, $current; # Current lexical code
895	0					0	Mov $middle, $current;
896	0					0	Inc $current; # Next possible character
897							For
898	0					0	{my ($start, $end, $next) = @_;
899	0					0	getLexicalCode $c2, $address, $current; # Lexical code of next character
900	0					0	Cmp $c1, $c2;
901	0					0	Jne $end; # Terminate when we are in a different lexical item
902	0					0	} $current, $size;
903	0					0	} $current, $size;
904
905	0					0	PopR;
906	0					0	} [qw(address size)], name => q(Unisyn::Parse::ClassifyNewLines);
907
908	0					0	$s->call(@parameters);
909							} # ClassifyNewLines
910
911							sub ClassifyWhiteSpace(@) #P Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
912	0			0	1	0	{my (@parameters) = @_; # Parameters
913	0	0				0	@_ >= 1 or confess "One or more parameters";
914
915							my $s = Subroutine
916	0					0	{my ($p) = @_; # Parameters
917	0					0	my $eb = r15."b"; # Lexical type of current char
918	0					0	my $s = r14; # State of white space between 'a'
919	0					0	my $S = r13; # State of white space before 'a'
920	0					0	my $cb = r12."b"; # Actual character within alphabet
921	0					0	my $address = r11; # Address of input string
922	0					0	my $index = r10; # Index of current char
923	0					0	my ($w1, $w2) = (r8."b", r9."b"); # Temporary work registers
924
925							my sub getAlpha($;$) # Load the position of a lexical item in its alphabet from the current character
926	0					0	{my ($register, $indexReg) = @_; # Register to load, optional index register
927	0		0			0	getAlpha $register, $address, $index // $indexReg # Supplied index or default
928							};
929
930							my sub getLexicalCode() # Load the lexical code of the current character in memory into the current character
931	0					0	{getLexicalCode $eb, $address, $index; # Supplied index or default
932							};
933
934							my sub putLexicalCode($;$) # Put the specified lexical code into the current character in memory.
935	0					0	{my ($code, $indexReg) = @_; # Code, optional index register
936	0		0			0	putLexicalCode $w1, $address, ($indexReg//$index), $code;
937							};
938
939	0					0	PushR r8, r9, r10, r11, r12, r13, r14, r15;
940
941	0					0	$$p{address}->setReg($address); # Address of string
942	0					0	Mov $s, -1; Mov $S, -1; Mov $index, 0; # Initial states, position
	0					0
	0					0
943
944							$$p{size}->for(sub # Each character in expression
945	0					0	{my ($indexVariable, $start, $next, $end) = @_;
946
947	0					0	$indexVariable->setReg($index);
948	0					0	getLexicalCode; # Current lexical code
949
950							AndBlock # Trap space before new line and detect new line after ascii
951	0					0	{my ($end, $start) = @_;
952	0					0	Cmp $index, 0; Je $end; # Start beyond the first character so we can look back one character.
	0					0
953	0					0	Cmp $eb, $Ascii; Jne $end; # Current is ascii
	0					0
954
955	0					0	Mov $w1, "[$address+$indexScale*$index-$indexScale+$lexCodeOffset]"; # Previous lexical code
956	0					0	Cmp $w1, $Ascii; Jne $end; # Previous is ascii
	0					0
957
958	0					0	if (1) # Check for 's' followed by 'n' and 'a' followed by 'n'
959	0					0	{Mov $w1, "[$address+$indexScale*$index-$indexScale]"; # Previous character
960	0					0	getAlpha $w2; # Current character
961
962	0					0	Cmp $w1, $asciiSpace; # Check for space followed by new line
963							IfEq
964							Then
965	0					0	{Cmp $w2, $asciiNewLine;
966							IfEq # Disallow 's' followed by 'n'
967							Then
968	0					0	{PrintErrStringNL "Space detected before new line at index:";
969	0					0	PrintErrRegisterInHex $index;
970	0					0	PrintErrTraceBack;
971	0					0	Exit(1);
972	0					0	};
973	0					0	};
974
975	0					0	Cmp $w1, $asciiSpace; Je $end; # Check for 'a' followed by 'n'
	0					0
976	0					0	Cmp $w1, $asciiNewLine; Je $end; # Current is 'a' but not 'n' or 's'
	0					0
977	0					0	Cmp $w2, $asciiNewLine; Jne $end; # Current is 'n'
	0					0
978
979	0					0	putLexicalCode $WhiteSpace; # Mark new line as significant
980							}
981	0					0	};
982
983							AndBlock # Spaces and new lines between other ascii
984	0					0	{my ($end, $start) = @_;
985	0					0	Cmp $s, -1;
986							IfEq # Looking for opening ascii
987							Then
988	0					0	{Cmp $eb, $Ascii; Jne $end; # Not ascii
	0					0
989	0					0	getAlpha $cb; # Current character
990	0					0	Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
	0					0
991	0					0	Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
	0					0
992							IfEq
993							Then
994	0					0	{Mov $s, $index; Inc $s; # Ascii not space nor new line
	0					0
995	0					0	};
996	0					0	Jmp $end;
997							},
998							Else # Looking for closing ascii
999	0					0	{Cmp $eb, $Ascii;
1000							IfNe # Not ascii
1001							Then
1002	0					0	{Mov $s, -1;
1003	0					0	Jmp $end
1004	0					0	};
1005	0					0	getAlpha $cb; # Current character
1006	0					0	Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
	0					0
1007	0					0	Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
	0					0
1008
1009							For # Move over spaces and new lines between two ascii characters that are neither of new line or space
1010	0					0	{my ($start, $end, $next) = @_;
1011	0					0	getAlpha $cb, $s; # Check for 's' or 'n'
1012	0					0	Cmp $cb, $asciiSpace;
1013							IfEq
1014							Then
1015	0					0	{putLexicalCode $WhiteSpace, $s; # Mark as significant white space.
1016	0					0	Jmp $next;
1017	0					0	};
1018	0					0	Cmp $cb, $asciiNewLine;
1019							IfEq
1020							Then
1021	0					0	{putLexicalCode $WhiteSpace; # Mark as significant new line
1022	0					0	Jmp $next;
1023	0					0	};
1024	0					0	} $s, $index;
1025
1026	0					0	Mov $s, $index; Inc $s;
	0					0
1027	0					0	};
1028	0					0	};
1029
1030							AndBlock # Note: 's' preceding 'a' are significant
1031	0					0	{my ($end, $start) = @_;
1032	0					0	Cmp $S, -1;
1033							IfEq # Looking for 's'
1034							Then
1035	0					0	{Cmp $eb, $Ascii; # Not 'a'
1036							IfNe
1037							Then
1038	0					0	{Mov $S, -1;
1039	0					0	Jmp $end
1040	0					0	};
1041	0					0	getAlpha $cb; # Actual character in alphabet
1042	0					0	Cmp $cb, $asciiSpace; # Space
1043							IfEq
1044							Then
1045	0					0	{Mov $S, $index;
1046	0					0	Jmp $end;
1047	0					0	};
1048							},
1049							Else # Looking for 'a'
1050	0					0	{Cmp $eb, $Ascii; # Not 'a'
1051							IfNe
1052							Then
1053	0					0	{Mov $S, -1;
1054	0					0	Jmp $end
1055	0					0	};
1056	0					0	getAlpha $cb; # Actual character in alphabet
1057	0					0	Cmp $cb, $asciiSpace; Je $end; # Skip 's'
	0					0
1058
1059	0					0	Cmp $cb, $asciiNewLine;
1060							IfEq # New lines prevent 's' from preceding 'a'
1061							Then
1062	0					0	{Mov $s, -1;
1063	0					0	Jmp $end
1064	0					0	};
1065
1066							For # Move over spaces to non space ascii
1067	0					0	{my ($start, $end, $next) = @_;
1068	0					0	putLexicalCode $WhiteSpace, $S; # Mark new line as significant
1069	0					0	} $S, $index;
1070	0					0	Mov $S, -1; # Look for next possible space
1071							}
1072	0					0	};
	0					0
1073	0					0	});
1074
1075							$$p{size}->for(sub # Invert white space so that significant white space becomes ascii and the remainder is ignored
1076	0					0	{my ($indexVariable, $start, $next, $end) = @_;
1077
1078	0					0	$indexVariable->setReg($index);
1079	0					0	getLexicalCode; # Current lexical code
1080
1081							AndBlock # Invert non significant white space
1082	0					0	{my ($end, $start) = @_;
1083	0					0	Cmp $eb, $Ascii;
1084	0					0	Jne $end; # Ascii
1085
1086	0					0	getAlpha $cb; # Actual character in alphabet
1087	0					0	Cmp $cb, $asciiSpace;
1088							IfEq
1089							Then
1090	0					0	{putLexicalCode $WhiteSpace;
1091	0					0	Jmp $next;
1092	0					0	};
1093	0					0	Cmp $cb, $asciiNewLine;
1094							IfEq
1095							Then
1096	0					0	{putLexicalCode $WhiteSpace; # Mark new line as not significant
1097	0					0	Jmp $next;
1098	0					0	};
1099	0					0	};
1100
1101							AndBlock # Mark significant white space
1102	0					0	{my ($end, $start) = @_;
1103	0					0	Cmp $eb, $WhiteSpace; Jne $end; # Not significant white space
	0					0
1104	0					0	putLexicalCode $Ascii; # Mark as ascii
1105	0					0	};
1106	0					0	});
1107
1108	0					0	PopR;
1109	0					0	} [qw(address size)], name => q(Unisyn::Parse::ClassifyWhiteSpace);
1110
1111	0					0	$s->call(@parameters);
1112							} # ClassifyWhiteSpace
1113
1114							sub reload($$) #P Reload the variables associated with a parse.
1115	0			0	1	0	{my ($parse, $parameters) = @_; # Parse, hash of variable parameters
1116	0	0				0	@_ >= 1 or confess "One or more parameters";
1117
1118							$parse->quarks->reload (arena => $$parameters{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
1119							array => $$parameters{numbersToStringsFirst},
1120	0					0	tree => $$parameters{stringsToNumbersFirst});
1121
1122							$parse->operators->reload(arena => $$parameters{bs}, # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
1123							array => $$parameters{opNumbersToStringsFirst},
1124	0	0				0	tree => $$parameters{opStringsToNumbersFirst}) if $parse->operators;
1125							}
1126
1127							sub parseUtf8($@) #P Parse a unisyn expression encoded as utf8 and return the parse tree.
1128	0			0	1	0	{my ($parse, @parameters) = @_; # Parse, parameters
1129	0	0				0	@_ >= 1 or confess "One or more parameters";
1130
1131							my $s = Subroutine
1132	0					0	{my ($p, $s) = @_; # Parameters
1133	0					0	$ParseUtf8SubDef = $s; # Save the sub definition globally so that we can forward its parameter list to L.
1134
1135	0					0	$parse->reload($p); # Reload the parse description
1136	0	0				0	PrintErrStringNL "ParseUtf8" if $debug;
1137
1138	0					0	PushR $parseStackBase, map {"r$_"} 8..15;
	0					0
1139	0					0	PushZmm 0..1; PushMask 0..2; # Used to hold arena and classifiers. Zmm0 is used to as a short string to quark the lexical item strings.
	0					0
1140
1141	0					0	my $source32 = $$p{source32};
1142	0					0	my $sourceSize32 = $$p{sourceSize32};
1143	0					0	my $sourceLength32 = $$p{sourceLength32};
1144
1145							ConvertUtf8ToUtf32 u8 => $$p{address}, size8 => $$p{size}, # Convert to utf32
1146	0					0	u32 => $source32, size32 => $sourceSize32,
1147							count => $sourceLength32;
1148
1149							my sub PrintUtf32($$) # Print a utf32 string in hexadecimal
1150	0					0	{my ($size, $address) = @_; # Variable size, variable address
1151	0					0	$address->printErrMemoryInHexNL($size);
1152							}
1153
1154	0	0				0	if ($debug)
1155	0					0	{PrintErrStringNL "After conversion from utf8 to utf32";
1156	0					0	$sourceSize32 ->errNL("Output Length: "); # Write output length
1157	0					0	PrintUtf32($sourceSize32, $source32); # Print utf32
1158							}
1159
1160	0					0	Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{lexicalLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1161	0					0	Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{lexicalHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1162
1163	0					0	ClassifyWithInRangeAndSaveOffset address=>$source32, size=>$sourceLength32; # Alphabetic classification
1164	0	0				0	if ($debug)
1165	0					0	{PrintErrStringNL "After classification into alphabet ranges";
1166	0					0	PrintUtf32($sourceSize32, $source32); # Print classified utf32
1167							}
1168
1169	0					0	Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{bracketsLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1170	0					0	Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{bracketsHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1171
1172	0					0	ClassifyWithInRange address=>$source32, size=>$sourceLength32; # Bracket classification
1173	0	0				0	if ($debug)
1174	0					0	{PrintErrStringNL "After classification into brackets";
1175	0					0	PrintUtf32($sourceSize32, $source32); # Print classified brackets
1176							}
1177
1178	0					0	my $opens = V(opens, -1);
1179	0					0	MatchBrackets address=>$source32, size=>$sourceLength32, $opens, $$p{fail}; # Match brackets
1180	0	0				0	if ($debug)
1181	0					0	{PrintErrStringNL "After bracket matching";
1182	0					0	PrintUtf32($sourceSize32, $source32); # Print matched brackets
1183							}
1184
1185	0					0	ClassifyWhiteSpace address=>$source32, size=>$sourceLength32; # Classify white space
1186	0	0				0	if ($debug)
1187	0					0	{PrintErrStringNL "After white space classification";
1188	0					0	PrintUtf32($sourceSize32, $source32);
1189							}
1190
1191	0					0	ClassifyNewLines address=>$source32, size=>$sourceLength32; # Classify new lines
1192	0	0				0	if ($debug)
1193	0					0	{PrintErrStringNL "After classifying new lines";
1194	0					0	PrintUtf32($sourceSize32, $source32);
1195							}
1196
1197	0					0	$$p{source32} ->setReg($start); # Start of expression string after it has been classified
1198	0					0	$$p{sourceLength32}->setReg($size); # Number of characters in the expression
1199	0					0	Mov $parseStackBase, rsp; # Set base of parse stack
1200
1201	0					0	parseExpression; # Parse the expression
1202
1203	0					0	$$p{parse}->getReg(r15); # Number of characters in the expression
1204	0					0	Mov rsp, $parseStackBase; # Remove parse stack
1205
1206	0	0				0	$$p{parse}->errNL if $debug;
1207
1208	0					0	PopMask; PopZmm; PopR;
	0					0
	0					0
1209
1210							}
1211	0					0	[qw(bs address size parse fail source32 sourceSize32 sourceLength32),
1212							qw(numbersToStringsFirst stringsToNumbersFirst),
1213							qw(opNumbersToStringsFirst opStringsToNumbersFirst)],
1214							name => q(Unisyn::Parse::parseUtf8);
1215
1216	0					0	my $op = $parse->operators; # The operator methods if supplied
1217	0					0	my $zero = K(zero, 0);
1218
1219	0	0				0	$s->call # Parameterize the parse
		0
1220							(bs => $parse->arena->bs,
1221							address => $parse->address8,
1222							fail => $parse->fails,
1223							parse => $parse->parse,
1224							size => $parse->size8,
1225							source32 => $parse->source32,
1226							sourceLength32 => $parse->sourceLength32,
1227							sourceSize32 => $parse->sourceSize32,
1228							numbersToStringsFirst => $parse->quarks->numbersToStrings->first,
1229							stringsToNumbersFirst => $parse->quarks->stringsToNumbers->first,
1230							opNumbersToStringsFirst => $op ? $op->numbersToStrings->first : $zero,
1231							opStringsToNumbersFirst => $op ? $op->stringsToNumbers->first : $zero,
1232							);
1233							} # parseUtf8
1234
1235							#D1 Traverse # Traverse the parse tree
1236
1237							sub traverseParseTree($) # Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
1238	0			0	1	0	{my ($parse) = @_; # Parse tree
1239
1240							my $s = Subroutine # Print a tree
1241	0			0		0	{my ($p, $s) = @_; # Parameters, sub definition
1242	0					0	my $t = Nasm::X86::DescribeTree (arena=>$$p{bs}, first=>$$p{first});
1243	0					0	$t->find(K(key, $opType)); # The lexical type of the element - normally a term
1244
1245							If $t->found == 0, # Not found lexical type of element
1246							Then
1247	0					0	{PrintOutString "No type for node";
1248	0					0	Exit(1);
1249	0					0	};
1250
1251							If $t->data != $term, # Expected a term
1252							Then
1253	0					0	{PrintOutString "Expected a term";
1254	0					0	Exit(1);
1255	0					0	};
1256
1257	0					0	my $operands = V(operands); # Number of operands
1258	0					0	$t->find(K(key, 1)); # Key 1 tells us the number of operands
1259							If $t->found > 0, # Found key 1
1260							Then
1261	0					0	{$operands->copy($t->data); # Number of operands
1262							},
1263							Else
1264	0					0	{PrintOutString "Expected at least one operand";
1265	0					0	Exit(1);
1266	0					0	};
1267
1268							$operands->for(sub # Each operand
1269	0					0	{my ($index, $start, $next, $end) = @_; # Execute body
1270	0					0	my $i = (1 + $index) * $lexItemWidth; # Operand detail
1271	0					0	$t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
	0					0
1272	0					0	$t->find($i+$lexItemOffset); my $off = V(key) ->copy($t->data); # Offset of first block of sub tree
	0					0
1273
1274							If $lex == $term, # Term
1275							Then
1276	0					0	{$s->call($$p{bs}, first => $off); # Traverse sub tree referenced by offset field
1277	0					0	$t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1278							},
1279	0					0	});
	0					0
1280
1281	0					0	$t->find(K(key, $opSub)); # The subroutine for the term
1282							If $t->found > 0, # Found subroutine for term
1283							Then # Call subroutine for this term
1284							{#PushR r15, zmm0;
1285							my $p = Subroutine # Prototype subroutine to establish parameter list
1286	0					0	{} [qw(tree call)], with => $s,
1287							name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::prototype";
1288
1289							my $d = Subroutine # Dispatcher
1290	0					0	{my ($q, $sub) = @_;
1291	0					0	$p->dispatchV($$q{call}, r15);
1292	0					0	} [], with => $p,
1293							name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::dispatch";
1294
1295							If $t->data > 0,
1296							Then
1297	0					0	{$d->call(tree => $t->first, call => $t->data) # Call sub associated with the lexical item
1298	0					0	};
1299							# my $p = Subroutine # Subroutine
1300							# {my ($parameters) = @_; # Parameters
1301							# $$parameters{call}->setReg(r15);
1302							# Call r15;
1303							# } [qw(tree call)], with => $s,
1304							# name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem";
1305							#
1306							# my $l = RegisterSize rax;
1307							# $$p{bs} ->putQIntoZmm(0, 0*$l, r15);
1308							# $$p{first}->putQIntoZmm(0, 1*$l, r15);
1309							# $t->data ->setReg(r15);
1310							# Call r15;
1311							# #PopR;
1312	0					0	};
1313
1314	0					0	} [qw(bs first)], name => "Nasm::X86::Tree::traverseParseTree";
1315
1316	0					0	PushR r15, zmm0;
1317	0					0	$s->call($parse->arena->bs, first => $parse->parse);
1318	0					0	PopR;
1319
1320	0					0	$a
1321							} # traverseParseTree
1322
1323							#D1 Print # Print a parse tree
1324
1325							sub printLexicalItem($$$$) #P Print the utf8 string corresponding to a lexical item at a variable offset.
1326	0			0	1	0	{my ($parse, $source32, $offset, $size) = @_; # Parse tree, B of utf32 source representation, B to lexical item in utf32, B in utf32 chars of item
1327	0					0	my $t = $parse->arena->DescribeTree;
1328
1329							my $s = Subroutine
1330	0			0		0	{my ($p, $s) = @_; # Parameters
1331	0					0	PushR r12, r13, r14, r15;
1332
1333	0					0	$$p{source32}->setReg(r14);
1334	0					0	$$p{offset} ->setReg(r15);
1335	0					0	Lea r13, "[r14+4*r15]"; # Address lexical item
1336	0					0	Mov eax, "[r13]"; # First lexical item clearing rax
1337	0					0	Shr rax, 24; # First lexical item type in lowest byte and all else cleared
1338
1339	0					0	my $success = Label;
1340	0					0	my $print = Label;
1341
1342	0					0	Cmp rax, $bracketsBase; # Test for brackets
1343							IfGe
1344							Then
1345	0					0	{my $o = $Lex->{bracketsOpen}; # Opening brackets
1346	0					0	my $c = $Lex->{bracketsClose}; # Closing brackets
1347	0					0	my $O = Rutf8 map {($_, chr(0))} @$o; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
	0					0
1348	0					0	my $C = Rutf8 map {($_, chr(0))} @$c; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
	0					0
1349	0					0	Mov r14, $O; # Address open bracket
1350	0					0	Mov r15, rax; # The bracket number
1351	0					0	Lea rax, "[r14+4r15 - 4$bracketsBase-4]"; # Index to bracket
1352	0					0	PrintOutUtf8Char; # Print opening bracket
1353	0					0	Mov r14, $C; # Address close bracket
1354	0					0	Lea rax, "[r14+4r15 - 4$bracketsBase-4]"; # Closing brackets occupy 3 bytes
1355	0					0	PrintOutUtf8Char; # Print closing bracket
1356	0					0	Jmp $success;
1357	0					0	};
1358
1359	0					0	Mov r12, -1; # Alphabet to use
1360	0					0	Cmp rax, $variable; # Test for variable
1361							IfEq
1362							Then
1363	0					0	{my $b = $Lex->{alphabetsOrdered}{variable}; # Load variable alphabet in dwords
1364	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1365	0					0	my $a = Rd @b;
1366	0					0	Mov r12, $a;
1367	0					0	Jmp $print;
1368	0					0	};
1369
1370	0					0	Cmp rax, $assign; # Assign operator
1371							IfEq
1372							Then
1373	0					0	{my $b = $Lex->{alphabetsOrdered}{assign};
1374	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1375	0					0	my $a = Rd @b;
1376	0					0	Mov r12, $a;
1377	0					0	Jmp $print;
1378	0					0	};
1379
1380	0					0	Cmp rax, $dyad; # Dyad
1381							IfEq
1382							Then
1383	0					0	{my $b = $Lex->{alphabetsOrdered}{dyad};
1384	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1385	0					0	my $a = Rd @b;
1386	0					0	Mov r12, $a;
1387	0					0	Jmp $print;
1388	0					0	};
1389
1390	0					0	Cmp rax, $Ascii; # Ascii
1391							IfEq
1392							Then
1393	0					0	{my $b = $Lex->{alphabetsOrdered}{Ascii};
1394	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1395	0					0	my $a = Rd @b;
1396	0					0	Mov r12, $a;
1397	0					0	Jmp $print;
1398	0					0	};
1399
1400	0					0	Cmp rax, $prefix; # Prefix
1401							IfEq
1402							Then
1403	0					0	{my $b = $Lex->{alphabetsOrdered}{prefix};
1404	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1405	0					0	my $a = Rd @b;
1406	0					0	Mov r12, $a;
1407	0					0	Jmp $print;
1408	0					0	};
1409
1410	0					0	Cmp rax, $suffix; # Suffix
1411							IfEq
1412							Then
1413	0					0	{my $b = $Lex->{alphabetsOrdered}{suffix};
1414	0					0	my @b = map {convertUtf32ToUtf8LE $_} @$b;
	0					0
1415	0					0	my $a = Rd @b;
1416	0					0	Mov r12, $a;
1417	0					0	Jmp $print;
1418	0					0	};
1419
1420	0					0	PrintErrTraceBack; # Unknown lexical type
1421	0					0	PrintErrStringNL "Alphabet not found for unexpected lexical item";
1422	0					0	PrintErrRegisterInHex rax;
1423	0					0	Exit(1);
1424
1425	0					0	SetLabel $print; # Decoded
1426
1427							$$p{size}->for(sub # Write each letter out from its position on the stack
1428	0					0	{my ($index, $start, $next, $end) = @_; # Execute body
1429	0					0	$index->setReg(r14); # Index stack
1430	0					0	ClearRegisters r15; # Next instruction does not clear the entire register
1431	0					0	Mov r15b, "[r13+4*r14]"; # Load alphabet offset from stack
1432	0					0	Shl r15, 2; # Each letter is 4 bytes wide in utf8
1433	0					0	Lea rax, "[r12+r15]"; # Address alphabet letter as utf8
1434	0					0	PrintOutUtf8Char; # Print utf8 character
1435	0					0	});
1436
1437	0					0	SetLabel $success; # Done
1438
1439	0					0	PopR;
1440	0					0	} [qw(offset source32 size)],
1441							name => q(Unisyn::Parse::printLexicalItem);
1442
1443	0					0	$s->call(offset => $offset, source32 => $source32, size => $size);
1444							}
1445
1446							sub print($) # Print a parse tree.
1447	0			0	1	0	{my ($parse) = @_; # Parse tree
1448	0					0	my $t = $parse->arena->DescribeTree;
1449
1450	0					0	PushR my ($depthR) = (r12); # Recursion depth
1451
1452							my $b = Subroutine # Print the spacing blanks to offset sub trees
1453							{V(loop, $depthR)->for(sub
1454	0					0	{PrintOutString " ";
1455	0			0		0	});
1456	0					0	} [], name => "Nasm::X86::Tree::dump::spaces";
1457
1458							my $s = Subroutine # Print a tree
1459	0			0		0	{my ($p, $s) = @_; # Parameters, sub definition
1460
1461	0					0	my $B = $$p{bs};
1462
1463	0					0	$t->address->copy($$p{bs});
1464	0					0	$t->first ->copy($$p{first});
1465	0					0	$t->find(K(key, 0)); # Key 0 tells us the type of the element - normally a term
1466
1467							If $t->found == 0, # Not found key 0
1468							Then
1469	0					0	{PrintOutString "No type for node";
1470	0					0	Exit(1);
1471	0					0	};
1472
1473							If $t->data != $term, # Expected a term
1474							Then
1475	0					0	{PrintOutString "Expected a term";
1476	0					0	Exit(1);
1477	0					0	};
1478
1479	0					0	my $operands = V(operands); # Number of operands
1480	0					0	$t->find(K(key, 1)); # Key 1 tells us the number of operands
1481							If $t->found > 0, # Found key 1
1482							Then
1483	0					0	{$operands->copy($t->data); # Number of operands
1484							},
1485							Else
1486	0					0	{PrintOutString "Expected at least one operand";
1487	0					0	Exit(1);
1488	0					0	};
1489
1490							$operands->for(sub # Each operand
1491	0					0	{my ($index, $start, $next, $end) = @_; # Execute body
1492	0					0	my $i = (1 + $index) * $lexItemWidth; # Operand detail
1493	0					0	$t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
	0					0
1494	0					0	$t->find($i+$lexItemOffset); my $off = V(data)->copy($t->data); # Offset in source
	0					0
1495	0					0	$t->find($i+$lexItemLength); my $len = V(data)->copy($t->data); # Length in source
	0					0
1496
1497	0					0	$b->call; # Indent
1498
1499							If $lex == $term, # Term
1500							Then
1501	0					0	{PrintOutStringNL "Term";
1502	0					0	Inc $depthR; # Increase indentation for sub terms
1503	0					0	$s->call($B, first => $off, $$p{source32}); # Print sub tree referenced by offset field
1504	0					0	Dec $depthR; # Restore existing indentation
1505	0					0	$t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1506							},
1507
1508	0					0	Ef {$lex == $semiColon} # Semicolon
1509							Then
1510	0					0	{PrintOutStringNL "Semicolon";
1511							},
1512
1513							Else
1514							{If $lex == $variable, # Variable
1515							Then
1516	0					0	{PrintOutString "Variable: ";
1517							},
1518
1519	0					0	Ef {$lex == $assign} # Assign
1520							Then
1521	0					0	{PrintOutString "Assign: ";
1522							},
1523
1524	0					0	Ef {$lex == $prefix} # Prefix
1525							Then
1526	0					0	{PrintOutString "Prefix: ";
1527							},
1528
1529	0					0	Ef {$lex == $suffix} # Suffix
1530							Then
1531	0					0	{PrintOutString "Suffix: ";
1532							},
1533
1534	0					0	Ef {$lex == $dyad} # Dyad
1535							Then
1536	0					0	{PrintOutString "Dyad: ";
1537							},
1538
1539	0					0	Ef {$lex == $Ascii} # Ascii
1540							Then
1541	0					0	{PrintOutString "Ascii: ";
1542							},
1543
1544							Else # Brackets
1545	0					0	{PrintOutString "Brackets: ";
1546	0					0	};
1547
1548	0					0	$parse->printLexicalItem($$p{source32}, $off, $len); # Print the variable name
1549	0					0	PrintOutNL;
1550	0					0	};
1551
1552							If $index == 0, # Operator followed by indented operands
1553							Then
1554	0					0	{Inc $depthR;
1555	0					0	};
1556	0					0	});
1557
1558	0					0	Dec $depthR; # Reset indentation after operands
1559	0					0	} [qw(bs first source32)], name => "Nasm::X86::Tree::print";
1560
1561	0					0	ClearRegisters $depthR; # Depth starts at zero
1562
1563	0					0	$s->call($parse->arena->bs, first => $parse->parse, $parse->source32);
1564
1565	0					0	PopR;
1566							} # print
1567
1568							sub dumpParseTree($) # Dump the parse tree.
1569	0			0	1	0	{my ($parse) = @_; # Parse tree
1570	0					0	my $t = $parse->arena->DescribeTree;
1571	0					0	$t->first->copy($parse->parse);
1572	0					0	$t->dump;
1573							}
1574
1575							#D1 Execute # Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
1576
1577							sub lexToSub($$$$) # Map a lexical item to a processing subroutine.
1578	0			0	1	0	{my ($parse, $alphabet, $op, $sub) = @_; # Sub quarks, the alphabet number, the operator name in that alphabet, subroutine definition
1579	0					0	my $a = &lexicalData->{alphabetsOrdered}{$alphabet}; # Alphabet
1580	0					0	my $n = $$Lex{lexicals}{$alphabet}{number}; # Number of lexical type
1581	0					0	my %i = map {$$a[$_]=>$_} keys @$a;
	0					0
1582	0					0	my @b = ($n, map {$i{ord $_}} split //, $op); # Bytes representing the operator name
	0					0
1583	0					0	my $s = join '', map {chr $_} @b; # String representation
	0					0
1584	0					0	$parse->operators->putSub($s, $sub); # Add the string, subroutine combination to the sub quarks
1585							}
1586
1587							sub dyad($$$) # Define a method for a dyadic operator.
1588	0			0	1	0	{my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1589	0					0	$parse->lexToSub("dyad", $text, $sub);
1590							}
1591
1592							sub assign($$$) # Define a method for an assign operator.
1593	0			0	1	0	{my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1594	0					0	$parse->lexToSub("assign", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1595							}
1596
1597							sub prefix($$$) # Define a method for a prefix operator.
1598	0			0	1	0	{my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1599	0					0	$parse->lexToSub("prefix", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1600							}
1601
1602							sub suffix($$$) # Define a method for a suffix operator.
1603	0			0	1	0	{my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1604	0					0	my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1605	0					0	$parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1606							}
1607
1608
1609							sub ascii($$) # Define a method for ascii text.
1610	0			0	1	0	{my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1611	0					0	my $n = $$Lex{lexicals}{Ascii}{number}; # Lexical number of ascii
1612	0					0	$parse->operators->putSub(chr($n), $sub); # Add the ascii subroutine to the sub quarks
1613							}
1614
1615							sub semiColon($$) # Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
1616	0			0	1	0	{my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1617	0					0	my $n = $$Lex{lexicals}{semiColon}{number}; # Lexical number of semicolon
1618	0					0	$parse->operators->putSub(chr($n), $sub); # Add the semicolon subroutine to the sub quarks
1619	0					0	my $N = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semi colon
1620	0					0	$parse->operators->putSub(chr($N), $sub); # Add the semicolon subroutine to the sub quarks
1621							}
1622
1623							sub variable($$) # Define a method for a variable.
1624	0			0	1	0	{my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1625	0					0	my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1626	0					0	$parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1627							}
1628
1629							sub bracket($$$) # Define a method for a bracket operator.
1630	0			0	1	0	{my ($parse, $open, $sub) = @_; # Sub quarks, opening parenthesis, associated subroutine
1631	0					0	my $l = &lexicalData;
1632	0					0	my $s = join '', sort $l->{bracketsOpen}->@;#, $l->{bracketsClose}->@; # Bracket alphabet
1633	0					0	my $b = index($s, $open);
1634	0	0				0	$b < 0 and confess "No such bracket: $open";
1635	0					0	my $n = $$Lex{lexicals}{OpenBracket}{number}; # Lexical number of open bracket
1636	0					0	$parse->operators->putSub(chr($n).chr($b+1+$l->{bracketsBase}), $sub); # Why plus one? # Add the brackets subroutine to the sub quarks
1637							}
1638
1639							#D1 Alphabets # Translate between alphabets.
1640
1641							sub showAlphabet($) #P Show an alphabet.
1642	0			0	1	0	{my ($alphabet) = @_; # Alphabet name
1643	0					0	my $out;
1644	0					0	my $lex = &lexicalData;
1645	0					0	my $abc = $lex->{alphabetsOrdered}{$alphabet};
1646	0					0	for my $a(@$abc)
1647	0					0	{$out .= chr($a);
1648							}
1649							$out
1650	0					0	}
1651
1652							sub asciiToAssignLatin($) # Translate ascii to the corresponding letters in the assign latin alphabet.
1653	0			0	1	0	{my ($in) = @_; # A string of ascii
1654	1			1		16488	$in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧/r;
	1					2
	1					16
	0					0
1655							}
1656
1657							sub asciiToAssignGreek($) # Translate ascii to the corresponding letters in the assign greek alphabet.
1658	0			0	1	0	{my ($in) = @_; # A string of ascii
1659	0					0	$in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔/r;
1660							}
1661
1662							sub asciiToDyadLatin($) # Translate ascii to the corresponding letters in the dyad latin alphabet.
1663	0			0	1	0	{my ($in) = @_; # A string of ascii
1664	0					0	$in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳/r;
1665							}
1666
1667							sub asciiToDyadGreek($) # Translate ascii to the corresponding letters in the dyad greek alphabet.
1668	0			0	1	0	{my ($in) = @_; # A string of ascii
1669	0					0	$in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚/r;
1670							}
1671
1672							sub asciiToPrefixLatin($) # Translate ascii to the corresponding letters in the prefix latin alphabet.
1673	0			0	1	0	{my ($in) = @_; # A string of ascii
1674	0					0	$in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛/r;
1675							}
1676
1677							sub asciiToPrefixGreek($) # Translate ascii to the corresponding letters in the prefix greek alphabet.
1678	0			0	1	0	{my ($in) = @_; # A string of ascii
1679	0					0	$in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎/r;
1680							}
1681
1682							sub asciiToSuffixLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1683	0			0	1	0	{my ($in) = @_; # A string of ascii
1684	0					0	$in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯/r;
1685							}
1686
1687							sub asciiToSuffixGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1688	0			0	1	0	{my ($in) = @_; # A string of ascii
1689	0					0	$in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂/r;
1690							}
1691
1692							sub asciiToVariableLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1693	0			0	1	0	{my ($in) = @_; # A string of ascii
1694	0					0	$in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇/r;
1695							}
1696
1697							sub asciiToVariableGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1698	0			0	1	0	{my ($in) = @_; # A string of ascii
1699	0					0	$in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈/r;
1700							}
1701
1702							sub asciiToEscaped($) # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1703	0			0	1	0	{my ($in) = @_; # A string of ascii
1704	0					0	$in =~ tr/abcdefghijklmnopqrstuvwxyz/🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩/r;
1705							}
1706
1707							sub semiColonChar() # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1708							{chr(10210)
1709							}
1710
1711							#d
1712	1			1	0	2	sub lexicalData {do {
1713	1					318	my $a = bless({
1714							alphabetRanges => 14,
1715							alphabets => {
1716							"circledLatinLetter" => "\x{24B6}\x{24B7}\x{24B8}\x{24B9}\x{24BA}\x{24BB}\x{24BC}\x{24BD}\x{24BE}\x{24BF}\x{24C0}\x{24C1}\x{24C2}\x{24C3}\x{24C4}\x{24C5}\x{24C6}\x{24C7}\x{24C8}\x{24C9}\x{24CA}\x{24CB}\x{24CC}\x{24CD}\x{24CE}\x{24CF}\x{24D0}\x{24D1}\x{24D2}\x{24D3}\x{24D4}\x{24D5}\x{24D6}\x{24D7}\x{24D8}\x{24D9}\x{24DA}\x{24DB}\x{24DC}\x{24DD}\x{24DE}\x{24DF}\x{24E0}\x{24E1}\x{24E2}\x{24E3}\x{24E4}\x{24E5}\x{24E6}\x{24E7}\x{24E8}\x{24E9}",
1717							"mathematicalBold" => "\x{1D400}\x{1D401}\x{1D402}\x{1D403}\x{1D404}\x{1D405}\x{1D406}\x{1D407}\x{1D408}\x{1D409}\x{1D40A}\x{1D40B}\x{1D40C}\x{1D40D}\x{1D40E}\x{1D40F}\x{1D410}\x{1D411}\x{1D412}\x{1D413}\x{1D414}\x{1D415}\x{1D416}\x{1D417}\x{1D418}\x{1D419}\x{1D41A}\x{1D41B}\x{1D41C}\x{1D41D}\x{1D41E}\x{1D41F}\x{1D420}\x{1D421}\x{1D422}\x{1D423}\x{1D424}\x{1D425}\x{1D426}\x{1D427}\x{1D428}\x{1D429}\x{1D42A}\x{1D42B}\x{1D42C}\x{1D42D}\x{1D42E}\x{1D42F}\x{1D430}\x{1D431}\x{1D432}\x{1D433}\x{1D6A8}\x{1D6A9}\x{1D6AA}\x{1D6AB}\x{1D6AC}\x{1D6AD}\x{1D6AE}\x{1D6AF}\x{1D6B0}\x{1D6B1}\x{1D6B2}\x{1D6B3}\x{1D6B4}\x{1D6B5}\x{1D6B6}\x{1D6B7}\x{1D6B8}\x{1D6B9}\x{1D6BA}\x{1D6BB}\x{1D6BC}\x{1D6BD}\x{1D6BE}\x{1D6BF}\x{1D6C0}\x{1D6C1}\x{1D6C2}\x{1D6C3}\x{1D6C4}\x{1D6C5}\x{1D6C6}\x{1D6C7}\x{1D6C8}\x{1D6C9}\x{1D6CA}\x{1D6CB}\x{1D6CC}\x{1D6CD}\x{1D6CE}\x{1D6CF}\x{1D6D0}\x{1D6D1}\x{1D6D2}\x{1D6D3}\x{1D6D4}\x{1D6D5}\x{1D6D6}\x{1D6D7}\x{1D6D8}\x{1D6D9}\x{1D6DA}\x{1D6DB}\x{1D6DC}\x{1D6DD}\x{1D6DE}\x{1D6DF}\x{1D6E0}\x{1D6E1}",
1718							"mathematicalBoldFraktur" => "\x{1D56C}\x{1D56D}\x{1D56E}\x{1D56F}\x{1D570}\x{1D571}\x{1D572}\x{1D573}\x{1D574}\x{1D575}\x{1D576}\x{1D577}\x{1D578}\x{1D579}\x{1D57A}\x{1D57B}\x{1D57C}\x{1D57D}\x{1D57E}\x{1D57F}\x{1D580}\x{1D581}\x{1D582}\x{1D583}\x{1D584}\x{1D585}\x{1D586}\x{1D587}\x{1D588}\x{1D589}\x{1D58A}\x{1D58B}\x{1D58C}\x{1D58D}\x{1D58E}\x{1D58F}\x{1D590}\x{1D591}\x{1D592}\x{1D593}\x{1D594}\x{1D595}\x{1D596}\x{1D597}\x{1D598}\x{1D599}\x{1D59A}\x{1D59B}\x{1D59C}\x{1D59D}\x{1D59E}\x{1D59F}",
1719							"mathematicalBoldItalic" => "\x{1D468}\x{1D469}\x{1D46A}\x{1D46B}\x{1D46C}\x{1D46D}\x{1D46E}\x{1D46F}\x{1D470}\x{1D471}\x{1D472}\x{1D473}\x{1D474}\x{1D475}\x{1D476}\x{1D477}\x{1D478}\x{1D479}\x{1D47A}\x{1D47B}\x{1D47C}\x{1D47D}\x{1D47E}\x{1D47F}\x{1D480}\x{1D481}\x{1D482}\x{1D483}\x{1D484}\x{1D485}\x{1D486}\x{1D487}\x{1D488}\x{1D489}\x{1D48A}\x{1D48B}\x{1D48C}\x{1D48D}\x{1D48E}\x{1D48F}\x{1D490}\x{1D491}\x{1D492}\x{1D493}\x{1D494}\x{1D495}\x{1D496}\x{1D497}\x{1D498}\x{1D499}\x{1D49A}\x{1D49B}\x{1D71C}\x{1D71D}\x{1D71E}\x{1D71F}\x{1D720}\x{1D721}\x{1D722}\x{1D723}\x{1D724}\x{1D725}\x{1D726}\x{1D727}\x{1D728}\x{1D729}\x{1D72A}\x{1D72B}\x{1D72C}\x{1D72D}\x{1D72E}\x{1D72F}\x{1D730}\x{1D731}\x{1D732}\x{1D733}\x{1D734}\x{1D735}\x{1D736}\x{1D737}\x{1D738}\x{1D739}\x{1D73A}\x{1D73B}\x{1D73C}\x{1D73D}\x{1D73E}\x{1D73F}\x{1D740}\x{1D741}\x{1D742}\x{1D743}\x{1D744}\x{1D745}\x{1D746}\x{1D747}\x{1D748}\x{1D749}\x{1D74A}\x{1D74B}\x{1D74C}\x{1D74D}\x{1D74E}\x{1D74F}\x{1D750}\x{1D751}\x{1D752}\x{1D753}\x{1D754}\x{1D755}",
1720							"mathematicalBoldScript" => "\x{1D4D0}\x{1D4D1}\x{1D4D2}\x{1D4D3}\x{1D4D4}\x{1D4D5}\x{1D4D6}\x{1D4D7}\x{1D4D8}\x{1D4D9}\x{1D4DA}\x{1D4DB}\x{1D4DC}\x{1D4DD}\x{1D4DE}\x{1D4DF}\x{1D4E0}\x{1D4E1}\x{1D4E2}\x{1D4E3}\x{1D4E4}\x{1D4E5}\x{1D4E6}\x{1D4E7}\x{1D4E8}\x{1D4E9}\x{1D4EA}\x{1D4EB}\x{1D4EC}\x{1D4ED}\x{1D4EE}\x{1D4EF}\x{1D4F0}\x{1D4F1}\x{1D4F2}\x{1D4F3}\x{1D4F4}\x{1D4F5}\x{1D4F6}\x{1D4F7}\x{1D4F8}\x{1D4F9}\x{1D4FA}\x{1D4FB}\x{1D4FC}\x{1D4FD}\x{1D4FE}\x{1D4FF}\x{1D500}\x{1D501}\x{1D502}\x{1D503}",
1721							"mathematicalDouble-struck" => "\x{1D538}\x{1D539}\x{1D53B}\x{1D53C}\x{1D53D}\x{1D53E}\x{1D540}\x{1D541}\x{1D542}\x{1D543}\x{1D544}\x{1D546}\x{1D54A}\x{1D54B}\x{1D54C}\x{1D54D}\x{1D54E}\x{1D54F}\x{1D550}\x{1D552}\x{1D553}\x{1D554}\x{1D555}\x{1D556}\x{1D557}\x{1D558}\x{1D559}\x{1D55A}\x{1D55B}\x{1D55C}\x{1D55D}\x{1D55E}\x{1D55F}\x{1D560}\x{1D561}\x{1D562}\x{1D563}\x{1D564}\x{1D565}\x{1D566}\x{1D567}\x{1D568}\x{1D569}\x{1D56A}\x{1D56B}",
1722							"mathematicalFraktur" => "\x{1D504}\x{1D505}\x{1D507}\x{1D508}\x{1D509}\x{1D50A}\x{1D50D}\x{1D50E}\x{1D50F}\x{1D510}\x{1D511}\x{1D512}\x{1D513}\x{1D514}\x{1D516}\x{1D517}\x{1D518}\x{1D519}\x{1D51A}\x{1D51B}\x{1D51C}\x{1D51E}\x{1D51F}\x{1D520}\x{1D521}\x{1D522}\x{1D523}\x{1D524}\x{1D525}\x{1D526}\x{1D527}\x{1D528}\x{1D529}\x{1D52A}\x{1D52B}\x{1D52C}\x{1D52D}\x{1D52E}\x{1D52F}\x{1D530}\x{1D531}\x{1D532}\x{1D533}\x{1D534}\x{1D535}\x{1D536}\x{1D537}",
1723							"mathematicalItalic" => "\x{1D434}\x{1D435}\x{1D436}\x{1D437}\x{1D438}\x{1D439}\x{1D43A}\x{1D43B}\x{1D43C}\x{1D43D}\x{1D43E}\x{1D43F}\x{1D440}\x{1D441}\x{1D442}\x{1D443}\x{1D444}\x{1D445}\x{1D446}\x{1D447}\x{1D448}\x{1D449}\x{1D44A}\x{1D44B}\x{1D44C}\x{1D44D}\x{1D44E}\x{1D44F}\x{1D450}\x{1D451}\x{1D452}\x{1D453}\x{1D454}\x{1D456}\x{1D457}\x{1D458}\x{1D459}\x{1D45A}\x{1D45B}\x{1D45C}\x{1D45D}\x{1D45E}\x{1D45F}\x{1D460}\x{1D461}\x{1D462}\x{1D463}\x{1D464}\x{1D465}\x{1D466}\x{1D467}\x{1D6E2}\x{1D6E3}\x{1D6E4}\x{1D6E5}\x{1D6E6}\x{1D6E7}\x{1D6E8}\x{1D6E9}\x{1D6EA}\x{1D6EB}\x{1D6EC}\x{1D6ED}\x{1D6EE}\x{1D6EF}\x{1D6F0}\x{1D6F1}\x{1D6F2}\x{1D6F3}\x{1D6F4}\x{1D6F5}\x{1D6F6}\x{1D6F7}\x{1D6F8}\x{1D6F9}\x{1D6FA}\x{1D6FB}\x{1D6FC}\x{1D6FD}\x{1D6FE}\x{1D6FF}\x{1D700}\x{1D701}\x{1D702}\x{1D703}\x{1D704}\x{1D705}\x{1D706}\x{1D707}\x{1D708}\x{1D709}\x{1D70A}\x{1D70B}\x{1D70C}\x{1D70D}\x{1D70E}\x{1D70F}\x{1D710}\x{1D711}\x{1D712}\x{1D713}\x{1D714}\x{1D715}\x{1D716}\x{1D717}\x{1D718}\x{1D719}\x{1D71A}\x{1D71B}",
1724							"mathematicalMonospace" => "\x{1D670}\x{1D671}\x{1D672}\x{1D673}\x{1D674}\x{1D675}\x{1D676}\x{1D677}\x{1D678}\x{1D679}\x{1D67A}\x{1D67B}\x{1D67C}\x{1D67D}\x{1D67E}\x{1D67F}\x{1D680}\x{1D681}\x{1D682}\x{1D683}\x{1D684}\x{1D685}\x{1D686}\x{1D687}\x{1D688}\x{1D689}\x{1D68A}\x{1D68B}\x{1D68C}\x{1D68D}\x{1D68E}\x{1D68F}\x{1D690}\x{1D691}\x{1D692}\x{1D693}\x{1D694}\x{1D695}\x{1D696}\x{1D697}\x{1D698}\x{1D699}\x{1D69A}\x{1D69B}\x{1D69C}\x{1D69D}\x{1D69E}\x{1D69F}\x{1D6A0}\x{1D6A1}\x{1D6A2}\x{1D6A3}",
1725							"mathematicalSans-serif" => "\x{1D5A0}\x{1D5A1}\x{1D5A2}\x{1D5A3}\x{1D5A4}\x{1D5A5}\x{1D5A6}\x{1D5A7}\x{1D5A8}\x{1D5A9}\x{1D5AA}\x{1D5AB}\x{1D5AC}\x{1D5AD}\x{1D5AE}\x{1D5AF}\x{1D5B0}\x{1D5B1}\x{1D5B2}\x{1D5B3}\x{1D5B4}\x{1D5B5}\x{1D5B6}\x{1D5B7}\x{1D5B8}\x{1D5B9}\x{1D5BA}\x{1D5BB}\x{1D5BC}\x{1D5BD}\x{1D5BE}\x{1D5BF}\x{1D5C0}\x{1D5C1}\x{1D5C2}\x{1D5C3}\x{1D5C4}\x{1D5C5}\x{1D5C6}\x{1D5C7}\x{1D5C8}\x{1D5C9}\x{1D5CA}\x{1D5CB}\x{1D5CC}\x{1D5CD}\x{1D5CE}\x{1D5CF}\x{1D5D0}\x{1D5D1}\x{1D5D2}\x{1D5D3}",
1726							"mathematicalSans-serifBold" => "\x{1D5D4}\x{1D5D5}\x{1D5D6}\x{1D5D7}\x{1D5D8}\x{1D5D9}\x{1D5DA}\x{1D5DB}\x{1D5DC}\x{1D5DD}\x{1D5DE}\x{1D5DF}\x{1D5E0}\x{1D5E1}\x{1D5E2}\x{1D5E3}\x{1D5E4}\x{1D5E5}\x{1D5E6}\x{1D5E7}\x{1D5E8}\x{1D5E9}\x{1D5EA}\x{1D5EB}\x{1D5EC}\x{1D5ED}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{1D5F1}\x{1D5F2}\x{1D5F3}\x{1D5F4}\x{1D5F5}\x{1D5F6}\x{1D5F7}\x{1D5F8}\x{1D5F9}\x{1D5FA}\x{1D5FB}\x{1D5FC}\x{1D5FD}\x{1D5FE}\x{1D5FF}\x{1D600}\x{1D601}\x{1D602}\x{1D603}\x{1D604}\x{1D605}\x{1D606}\x{1D607}\x{1D756}\x{1D757}\x{1D758}\x{1D759}\x{1D75A}\x{1D75B}\x{1D75C}\x{1D75D}\x{1D75E}\x{1D75F}\x{1D760}\x{1D761}\x{1D762}\x{1D763}\x{1D764}\x{1D765}\x{1D766}\x{1D767}\x{1D768}\x{1D769}\x{1D76A}\x{1D76B}\x{1D76C}\x{1D76D}\x{1D76E}\x{1D76F}\x{1D770}\x{1D771}\x{1D772}\x{1D773}\x{1D774}\x{1D775}\x{1D776}\x{1D777}\x{1D778}\x{1D779}\x{1D77A}\x{1D77B}\x{1D77C}\x{1D77D}\x{1D77E}\x{1D77F}\x{1D780}\x{1D781}\x{1D782}\x{1D783}\x{1D784}\x{1D785}\x{1D786}\x{1D787}\x{1D788}\x{1D789}\x{1D78A}\x{1D78B}\x{1D78C}\x{1D78D}\x{1D78E}\x{1D78F}",
1727							"mathematicalSans-serifBoldItalic" => "\x{1D63C}\x{1D63D}\x{1D63E}\x{1D63F}\x{1D640}\x{1D641}\x{1D642}\x{1D643}\x{1D644}\x{1D645}\x{1D646}\x{1D647}\x{1D648}\x{1D649}\x{1D64A}\x{1D64B}\x{1D64C}\x{1D64D}\x{1D64E}\x{1D64F}\x{1D650}\x{1D651}\x{1D652}\x{1D653}\x{1D654}\x{1D655}\x{1D656}\x{1D657}\x{1D658}\x{1D659}\x{1D65A}\x{1D65B}\x{1D65C}\x{1D65D}\x{1D65E}\x{1D65F}\x{1D660}\x{1D661}\x{1D662}\x{1D663}\x{1D664}\x{1D665}\x{1D666}\x{1D667}\x{1D668}\x{1D669}\x{1D66A}\x{1D66B}\x{1D66C}\x{1D66D}\x{1D66E}\x{1D66F}\x{1D790}\x{1D791}\x{1D792}\x{1D793}\x{1D794}\x{1D795}\x{1D796}\x{1D797}\x{1D798}\x{1D799}\x{1D79A}\x{1D79B}\x{1D79C}\x{1D79D}\x{1D79E}\x{1D79F}\x{1D7A0}\x{1D7A1}\x{1D7A2}\x{1D7A3}\x{1D7A4}\x{1D7A5}\x{1D7A6}\x{1D7A7}\x{1D7A8}\x{1D7A9}\x{1D7AA}\x{1D7AB}\x{1D7AC}\x{1D7AD}\x{1D7AE}\x{1D7AF}\x{1D7B0}\x{1D7B1}\x{1D7B2}\x{1D7B3}\x{1D7B4}\x{1D7B5}\x{1D7B6}\x{1D7B7}\x{1D7B8}\x{1D7B9}\x{1D7BA}\x{1D7BB}\x{1D7BC}\x{1D7BD}\x{1D7BE}\x{1D7BF}\x{1D7C0}\x{1D7C1}\x{1D7C2}\x{1D7C3}\x{1D7C4}\x{1D7C5}\x{1D7C6}\x{1D7C7}\x{1D7C8}\x{1D7C9}",
1728							"mathematicalSans-serifItalic" => "\x{1D608}\x{1D609}\x{1D60A}\x{1D60B}\x{1D60C}\x{1D60D}\x{1D60E}\x{1D60F}\x{1D610}\x{1D611}\x{1D612}\x{1D613}\x{1D614}\x{1D615}\x{1D616}\x{1D617}\x{1D618}\x{1D619}\x{1D61A}\x{1D61B}\x{1D61C}\x{1D61D}\x{1D61E}\x{1D61F}\x{1D620}\x{1D621}\x{1D622}\x{1D623}\x{1D624}\x{1D625}\x{1D626}\x{1D627}\x{1D628}\x{1D629}\x{1D62A}\x{1D62B}\x{1D62C}\x{1D62D}\x{1D62E}\x{1D62F}\x{1D630}\x{1D631}\x{1D632}\x{1D633}\x{1D634}\x{1D635}\x{1D636}\x{1D637}\x{1D638}\x{1D639}\x{1D63A}\x{1D63B}",
1729							"mathematicalScript" => "\x{1D49C}\x{1D49E}\x{1D49F}\x{1D4A2}\x{1D4A5}\x{1D4A6}\x{1D4A9}\x{1D4AA}\x{1D4AB}\x{1D4AC}\x{1D4AE}\x{1D4AF}\x{1D4B0}\x{1D4B1}\x{1D4B2}\x{1D4B3}\x{1D4B4}\x{1D4B5}\x{1D4B6}\x{1D4B7}\x{1D4B8}\x{1D4B9}\x{1D4BB}\x{1D4BD}\x{1D4BE}\x{1D4BF}\x{1D4C0}\x{1D4C1}\x{1D4C2}\x{1D4C3}\x{1D4C5}\x{1D4C6}\x{1D4C7}\x{1D4C8}\x{1D4C9}\x{1D4CA}\x{1D4CB}\x{1D4CC}\x{1D4CD}\x{1D4CE}\x{1D4CF}",
1730							"negativeCircledLatinLetter" => "\x{1F150}\x{1F151}\x{1F152}\x{1F153}\x{1F154}\x{1F155}\x{1F156}\x{1F157}\x{1F158}\x{1F159}\x{1F15A}\x{1F15B}\x{1F15C}\x{1F15D}\x{1F15E}\x{1F15F}\x{1F160}\x{1F161}\x{1F162}\x{1F163}\x{1F164}\x{1F165}\x{1F166}\x{1F167}\x{1F168}\x{1F169}",
1731							"negativeSquaredLatinLetter" => "\x{1F170}\x{1F171}\x{1F172}\x{1F173}\x{1F174}\x{1F175}\x{1F176}\x{1F177}\x{1F178}\x{1F179}\x{1F17A}\x{1F17B}\x{1F17C}\x{1F17D}\x{1F17E}\x{1F17F}\x{1F180}\x{1F181}\x{1F182}\x{1F183}\x{1F184}\x{1F185}\x{1F186}\x{1F187}\x{1F188}\x{1F189}",
1732							"planck" => "\x{210E}",
1733							"semiColon" => "\x{27E2}",
1734							"squaredLatinLetter" => "\x{1F130}\x{1F131}\x{1F132}\x{1F133}\x{1F134}\x{1F135}\x{1F136}\x{1F137}\x{1F138}\x{1F139}\x{1F13A}\x{1F13B}\x{1F13C}\x{1F13D}\x{1F13E}\x{1F13F}\x{1F140}\x{1F141}\x{1F142}\x{1F143}\x{1F144}\x{1F145}\x{1F146}\x{1F147}\x{1F148}\x{1F149}\x{1F1A5}",
1735							},
1736							alphabetsOrdered => {
1737							Ascii => [0 .. 127, 127312 .. 127337],
1738							assign => [8462, 119860 .. 119911, 120546 .. 120603],
1739							dyad => [119808 .. 119859, 120488 .. 120545],
1740							prefix => [119912 .. 119963, 120604 .. 120661],
1741							semiColon => [10210],
1742							suffix => [120380 .. 120431, 120720 .. 120777],
1743							variable => [120276 .. 120327, 120662 .. 120719],
1744							},
1745							brackets => 16,
1746							bracketsBase => 16,
1747							bracketsClose => [
1748							"\x{2309}",
1749							"\x{230B}",
1750							"\x{232A}",
1751							"\x{2769}",
1752							"\x{276B}",
1753							"\x{276D}",
1754							"\x{276F}",
1755							"\x{2771}",
1756							"\x{2773}",
1757							"\x{2775}",
1758							"\x{27E7}",
1759							"\x{27E9}",
1760							"\x{27EB}",
1761							"\x{27ED}",
1762							"\x{27EF}",
1763							"\x{2984}",
1764							"\x{2986}",
1765							"\x{2988}",
1766							"\x{298A}",
1767							"\x{298C}",
1768							"\x{298E}",
1769							"\x{2990}",
1770							"\x{2992}",
1771							"\x{2994}",
1772							"\x{2996}",
1773							"\x{2998}",
1774							"\x{29FD}",
1775							"\x{2E29}",
1776							"\x{3009}",
1777							"\x{300B}",
1778							"\x{3011}",
1779							"\x{3015}",
1780							"\x{3017}",
1781							"\x{3019}",
1782							"\x{301B}",
1783							"\x{FD3F}",
1784							"\x{FF09}",
1785							"\x{FF60}",
1786							],
1787							bracketsHigh => [
1788							"0x1300230b",
1789							"0x1500232a",
1790							"0x23002775",
1791							"0x2d0027ef",
1792							"0x43002998",
1793							"0x450029fd",
1794							"0x47002e29",
1795							"0x4b00300b",
1796							"0x4d003011",
1797							"0x5500301b",
1798							"0x5700fd3f",
1799							"0x5900ff09",
1800							"0x5b00ff60",
1801							0,
1802							0,
1803							0,
1804							],
1805							bracketsLow => [
1806							"0x10002308",
1807							"0x14002329",
1808							"0x16002768",
1809							"0x240027e6",
1810							"0x2e002983",
1811							"0x440029fc",
1812							"0x46002e28",
1813							"0x48003008",
1814							"0x4c003010",
1815							"0x4e003014",
1816							"0x5600fd3e",
1817							"0x5800ff08",
1818							"0x5a00ff5f",
1819							0,
1820							0,
1821							0,
1822							],
1823							bracketsOpen => [
1824							"\x{2308}",
1825							"\x{230A}",
1826							"\x{2329}",
1827							"\x{2768}",
1828							"\x{276A}",
1829							"\x{276C}",
1830							"\x{276E}",
1831							"\x{2770}",
1832							"\x{2772}",
1833							"\x{2774}",
1834							"\x{27E6}",
1835							"\x{27E8}",
1836							"\x{27EA}",
1837							"\x{27EC}",
1838							"\x{27EE}",
1839							"\x{2983}",
1840							"\x{2985}",
1841							"\x{2987}",
1842							"\x{2989}",
1843							"\x{298B}",
1844							"\x{298D}",
1845							"\x{298F}",
1846							"\x{2991}",
1847							"\x{2993}",
1848							"\x{2995}",
1849							"\x{2997}",
1850							"\x{29FC}",
1851							"\x{2E28}",
1852							"\x{3008}",
1853							"\x{300A}",
1854							"\x{3010}",
1855							"\x{3014}",
1856							"\x{3016}",
1857							"\x{3018}",
1858							"\x{301A}",
1859							"\x{FD3E}",
1860							"\x{FF08}",
1861							"\x{FF5F}",
1862							],
1863							lexicalAlpha => {
1864							"" => [
1865							"circledLatinLetter",
1866							"mathematicalBoldFraktur",
1867							"mathematicalBoldScript",
1868							"mathematicalDouble-struck",
1869							"mathematicalFraktur",
1870							"mathematicalMonospace",
1871							"mathematicalSans-serif",
1872							"mathematicalSans-serifItalic",
1873							"mathematicalScript",
1874							"negativeSquaredLatinLetter",
1875							"semiColon",
1876							"squaredLatinLetter",
1877							],
1878							"Ascii" => ["negativeCircledLatinLetter"],
1879							"assign" => ["mathematicalItalic", "planck"],
1880							"CloseBracket" => [],
1881							"dyad" => ["mathematicalBold"],
1882							"OpenBracket" => [],
1883							"prefix" => ["mathematicalBoldItalic"],
1884							"semiColon" => [],
1885							"suffix" => ["mathematicalSans-serifBoldItalic"],
1886							"term" => [],
1887							"variable" => ["mathematicalSans-serifBold"],
1888							},
1889							lexicalHigh => [
1890							127,
1891							8462,
1892							10210,
1893							119859,
1894							16897127,
1895							119963,
1896							120327,
1897							120431,
1898							872535777,
1899							889313051,
1900							872535893,
1901							872535951,
1902							872536009,
1903							2147610985,
1904							0,
1905							0,
1906							],
1907							lexicalLow => [
1908							33554432,
1909							83894542,
1910							134227938,
1911							50451456,
1912							84005940,
1913							67228776,
1914							100783572,
1915							117560892,
1916							50452136,
1917							84006626,
1918							67229468,
1919							100783958,
1920							117561232,
1921							33681744,
1922							0,
1923							0,
1924							],
1925							lexicals => bless({
1926							Ascii => bless({ letter => "a", like => "v", name => "Ascii", number => 2 }, "Unisyn::Parse::Lexical::Constant"),
1927							assign => bless({ letter => "a", like => "a", name => "assign", number => 5 }, "Unisyn::Parse::Lexical::Constant"),
1928							CloseBracket => bless({ letter => "B", like => "B", name => "CloseBracket", number => 1 }, "Unisyn::Parse::Lexical::Constant"),
1929							dyad => bless({ letter => "d", like => "d", name => "dyad", number => 3 }, "Unisyn::Parse::Lexical::Constant"),
1930							empty => bless({ letter => "e", like => "e", name => "empty", number => 10 }, "Unisyn::Parse::Lexical::Constant"),
1931							NewLineSemiColon => bless({ letter => "N", like => undef, name => "NewLineSemiColon", number => 12 }, "Unisyn::Parse::Lexical::Constant"),
1932							OpenBracket => bless({ letter => "b", like => "b", name => "OpenBracket", number => 0 }, "Unisyn::Parse::Lexical::Constant"),
1933							prefix => bless({ letter => "p", like => "p", name => "prefix", number => 4 }, "Unisyn::Parse::Lexical::Constant"),
1934							semiColon => bless({ letter => "s", like => "s", name => "semiColon", number => 8 }, "Unisyn::Parse::Lexical::Constant"),
1935							suffix => bless({ letter => "q", like => "q", name => "suffix", number => 7 }, "Unisyn::Parse::Lexical::Constant"),
1936							term => bless({ letter => "t", like => "t", name => "term", number => 9 }, "Unisyn::Parse::Lexical::Constant"),
1937							variable => bless({ letter => "v", like => "v", name => "variable", number => 6 }, "Unisyn::Parse::Lexical::Constant"),
1938							WhiteSpace => bless({ letter => "W", like => undef, name => "WhiteSpace", number => 11 }, "Unisyn::Parse::Lexical::Constant"),
1939							}, "Unisyn::Parse::Lexicals"),
1940							sampleLexicals => {
1941							A => [
1942							100663296,
1943							83886080,
1944							33554497,
1945							33554464,
1946							33554497,
1947							33554464,
1948							33554464,
1949							33554464,
1950							33554464,
1951							],
1952							Adv => [
1953							100663296,
1954							83886080,
1955							33554497,
1956							33554464,
1957							33554497,
1958							33554464,
1959							33554464,
1960							33554464,
1961							33554464,
1962							50331648,
1963							100663296,
1964							],
1965							BB => [
1966							0,
1967							0,
1968							0,
1969							0,
1970							0,
1971							0,
1972							0,
1973							0,
1974							100663296,
1975							16777216,
1976							16777216,
1977							16777216,
1978							16777216,
1979							16777216,
1980							16777216,
1981							16777216,
1982							16777216,
1983							],
1984							brackets => [
1985							100663296,
1986							83886080,
1987							0,
1988							0,
1989							0,
1990							100663296,
1991							16777216,
1992							16777216,
1993							50331648,
1994							0,
1995							100663296,
1996							16777216,
1997							16777216,
1998							134217728,
1999							],
2000							bvB => [0, 100663296, 16777216],
2001							nosemi => [
2002							100663296,
2003							83886080,
2004							0,
2005							0,
2006							0,
2007							100663296,
2008							16777216,
2009							16777216,
2010							50331648,
2011							0,
2012							100663296,
2013							16777216,
2014							16777216,
2015							],
2016							ppppvdvdvqqqq => [
2017							0,
2018							0,
2019							0,
2020							100663296,
2021							83886080,
2022							100663296,
2023							50331648,
2024							0,
2025							100663296,
2026							50331648,
2027							100663296,
2028							16777216,
2029							134217728,
2030							100663296,
2031							83886080,
2032							100663296,
2033							50331648,
2034							100663296,
2035							16777216,
2036							16777216,
2037							16777216,
2038							],
2039							s => [100663296, 134217728, 100663296],
2040							s1 => [
2041							100663296,
2042							83886080,
2043							33554442,
2044							33554464,
2045							33554464,
2046							33554497,
2047							33554442,
2048							33554464,
2049							33554464,
2050							33554464,
2051							],
2052							v => [100663296],
2053							vav => [100663296, 83886080, 100663296],
2054							vavav => [100663296, 83886080, 100663296, 83886080, 100663296],
2055							vnsvs => [
2056							100663296,
2057							33554442,
2058							33554464,
2059							33554464,
2060							33554464,
2061							100663296,
2062							33554464,
2063							33554464,
2064							33554464,
2065							],
2066							vnv => [100663296, 33554442, 100663296],
2067							vnvs => [
2068							100663296,
2069							33554442,
2070							100663296,
2071							33554464,
2072							33554464,
2073							33554464,
2074							33554464,
2075							],
2076							ws => [
2077							100663296,
2078							83886080,
2079							0,
2080							0,
2081							0,
2082							100663296,
2083							16777216,
2084							16777216,
2085							50331648,
2086							0,
2087							100663296,
2088							16777216,
2089							16777216,
2090							134217728,
2091							100663296,
2092							83886080,
2093							0,
2094							100663296,
2095							50331648,
2096							100663296,
2097							16777216,
2098							134217728,
2099							],
2100							wsa => [
2101							100663296,
2102							83886080,
2103							0,
2104							0,
2105							0,
2106							100663296,
2107							16777216,
2108							16777216,
2109							50331648,
2110							0,
2111							100663296,
2112							16777216,
2113							16777216,
2114							134217728,
2115							100663296,
2116							83886080,
2117							33554497,
2118							50331648,
2119							100663296,
2120							134217728,
2121							],
2122							},
2123							sampleText => {
2124							A => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 ",
2125							Adv => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 \x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D603}\x{1D5EE}\x{1D5FF}",
2126							BB => "\x{230A}\x{2329}\x{2768}\x{276A}\x{276C}\x{276E}\x{2770}\x{2772}\x{1D5EE}\x{2773}\x{2771}\x{276F}\x{276D}\x{276B}\x{2769}\x{232A}\x{230B}",
2127							brackets => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}",
2128							bvB => "\x{2329}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{232A}",
2129							nosemi => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}",
2130							ppppvdvdvqqqq => "\x{1D482}\x{2774}\x{1D483}\x{27E6}\x{1D484}\x{27E8}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}\x{1D485}\x{1D5EF}\x{1D659}\x{1D42D}\x{1D422}\x{1D426}\x{1D41E}\x{1D42C}\x{27EA}\x{1D5F0}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F1}\x{27EB}\x{27E2}\x{1D5F2}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{1D5F3}\x{1D42C}\x{1D42E}\x{1D41B}\x{1D5F4}\x{1D65D}\x{27E9}\x{1D658}\x{27E7}\x{1D657}\x{2775}\x{1D656}",
2131							s => "\x{1D5EE}\x{27E2}\x{1D5EF}",
2132							s1 => "\x{1D5EE}\x{1D44E}\n \n ",
2133							v => "\x{1D5EE}",
2134							vav => "\x{1D5EE}\x{1D44E}\x{1D5EF}",
2135							vavav => "\x{1D5EE}\x{1D44E}\x{1D5EF}\x{1D44E}\x{1D5F0}",
2136							vnsvs => "\x{1D5EE}\x{1D5EE}\n \x{1D5EF}\x{1D5EF} ",
2137							vnv => "\x{1D5EE}\n\x{1D5EF}",
2138							vnvs => "\x{1D5EE}\n\x{1D5EF} ",
2139							ws => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{276C}\x{1D5EF}\x{1D5EF}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{276D}\x{27E2}",
2140							wsa => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}some--ascii--text\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{27E2}",
2141							},
2142							semiColon => "\x{27E2}",
2143							separator => "\x{205F}",
2144							structure => bless({
2145							codes => bless({
2146							a => bless({
2147							letter => "a",
2148							name => "assignment operator",
2149							next => "bpv",
2150							short => "assign",
2151							}, "Tree::Term::LexicalCode"),
2152							b => bless({
2153							letter => "b",
2154							name => "opening parenthesis",
2155							next => "bBpsv",
2156							short => "OpenBracket",
2157							}, "Tree::Term::LexicalCode"),
2158							B => bless({
2159							letter => "B",
2160							name => "closing parenthesis",
2161							next => "aBdqs",
2162							short => "CloseBracket",
2163							}, "Tree::Term::LexicalCode"),
2164							d => bless({ letter => "d", name => "dyadic operator", next => "bpv", short => "dyad" }, "Tree::Term::LexicalCode"),
2165							p => bless({ letter => "p", name => "prefix operator", next => "bpv", short => "prefix" }, "Tree::Term::LexicalCode"),
2166							q => bless({
2167							letter => "q",
2168							name => "suffix operator",
2169							next => "aBdqs",
2170							short => "suffix",
2171							}, "Tree::Term::LexicalCode"),
2172							s => bless({ letter => "s", name => "semi-colon", next => "bBpsv", short => "semiColon" }, "Tree::Term::LexicalCode"),
2173							t => bless({ letter => "t", name => "term", next => "aBdqs", short => "term" }, "Tree::Term::LexicalCode"),
2174							v => bless({ letter => "v", name => "variable", next => "aBdqs", short => "variable" }, "Tree::Term::LexicalCode"),
2175							}, "Tree::Term::Codes"),
2176							first => "bpsv",
2177							last => "Bqsv",
2178							}, "Tree::Term::LexicalStructure"),
2179							treeTermLexicals => 'fix',
2180							}, "Unisyn::Parse::Lexical::Tables");
2181	1					7	$a->{treeTermLexicals} = $a->{structure}{codes};
2182	1					3	$a;
2183							}}
2184
2185							#-------------------------------------------------------------------------------
2186							# Export - eeee
2187							#-------------------------------------------------------------------------------
2188
2189	1			1		11519	use Exporter qw(import);
	1					2
	1					89
2190
2191	1			1		7	use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
	1					1
	1					533
2192
2193							@ISA = qw(Exporter);
2194							@EXPORT = qw();
2195							@EXPORT_OK = qw();
2196							%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2197
2198							# podDocumentation
2199							=pod
2200
2201							=encoding utf-8
2202
2203							=head1 Name
2204
2205							Unisyn::Parse - Parse a Unisyn expression.
2206
2207							=head1 Synopsis
2208
2209							Parse the B expression:
2210
2211							𝒂 ❴ 𝒃 ⟦𝒄⟨ 𝗮 𝑒𝑞𝑢𝑎𝑙𝑠 𝒅 𝗯 𝙙 𝐭𝐢𝐦𝐞𝐬 ⟪𝗰 𝐩𝐥𝐮𝐬 𝗱⟫⟢ 𝗲 𝑎𝑠𝑠𝑖𝑔𝑛 𝗳 𝐬𝐮𝐛 𝗴 𝙝⟩ 𝙘 ⟧ 𝙗 ❵ 𝙖
2212
2213							To get:
2214
2215							Suffix: 𝙖
2216							Term
2217							Prefix: 𝒂
2218							Term
2219							Brackets: ⦇⦈
2220							Term
2221							Term
2222							Suffix: 𝙗
2223							Term
2224							Prefix: 𝒃
2225							Term
2226							Brackets: ⦋⦌
2227							Term
2228							Term
2229							Suffix: 𝙘
2230							Term
2231							Prefix: 𝒄
2232							Term
2233							Brackets: ⦏⦐
2234							Term
2235							Term
2236							Semicolon
2237							Term
2238							Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2239							Term
2240							Variable: 𝗮
2241							Term
2242							Dyad: 𝐭𝐢𝐦𝐞𝐬
2243							Term
2244							Suffix: 𝙙
2245							Term
2246							Prefix: 𝒅
2247							Term
2248							Variable: 𝗯
2249							Term
2250							Brackets: ⦓⦔
2251							Term
2252							Term
2253							Dyad: 𝐩𝐥𝐮𝐬
2254							Term
2255							Variable: 𝗰
2256							Term
2257							Variable: 𝗱
2258							Term
2259							Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2260							Term
2261							Variable: 𝗲
2262							Term
2263							Dyad: 𝐬𝐮𝐛
2264							Term
2265							Variable: 𝗳
2266							Term
2267							Suffix: 𝙝
2268							Term
2269							Variable: 𝗴
2270
2271							Then traverse the parse tree printing the type of each node:
2272
2273							variable
2274							variable
2275							prefix_d
2276							suffix_d
2277							variable
2278							variable
2279							plus
2280							times
2281							equals
2282							variable
2283							variable
2284							variable
2285							sub
2286							assign
2287							semiColon
2288							brackets_3
2289							prefix_c
2290							suffix_c
2291							brackets_2
2292							prefix_b
2293							suffix_b
2294							brackets_1
2295							prefix_a
2296							suffix_a
2297
2298							=head1 Description
2299
2300							Parse a Unisyn expression.
2301
2302
2303							Version "20211008".
2304
2305
2306							The following sections describe the methods in each functional area of this
2307							module. For an alphabetic listing of all methods by name see L.
2308
2309
2310
2311							=head1 Create
2312
2313							Create a Unisyn parse of a utf8 string.
2314
2315							=head2 create($address, %options)
2316
2317							Create a new unisyn parse from a utf8 string.
2318
2319							Parameter Description
2320							1 $address Address of a zero terminated utf8 source string to parse as a variable
2321							2 %options Parse options.
2322
2323							B
2324
2325
2326
2327							create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2328
2329
2330							ok Assemble(debug => 0, eq => <
2331							Assign: 𝑎
2332							Term
2333							Variable: 𝗮
2334							Term
2335							Variable: 𝗯
2336							END
2337
2338
2339							=head1 Parse
2340
2341							Parse Unisyn expressions
2342
2343							=head1 Traverse
2344
2345							Traverse the parse tree
2346
2347							=head2 traverseParseTree($parse)
2348
2349							Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
2350
2351							Parameter Description
2352							1 $parse Parse tree
2353
2354							B
2355
2356
2357							my $s = Rutf8 $Lex->{sampleText}{Adv}; # Ascii
2358							my $p = create K(address, $s), operators => \&printOperatorSequence;
2359
2360							K(address, $s)->printOutZeroString;
2361							$p->dumpParseTree;
2362							$p->print;
2363
2364							$p->traverseParseTree; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2365
2366
2367							Assemble(debug => 0, eq => <
2368							𝗮𝗮𝑒𝑞𝑢𝑎𝑙𝑠abc 123 𝐩𝐥𝐮𝐬𝘃𝗮𝗿
2369							Tree at: 0000 0000 0000 10D8 length: 0000 0000 0000 000B
2370							Keys: 0000 1118 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2371							Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0F18 0000 0009 0000 0AD8 0000 0009 0000 0004 0000 0006 0000 0002 0000 0005 0041 2A7C 0000 0003 0000 0009
2372							Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2373							index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2374							index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2375							index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 2A7C
2376							index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0005
2377							index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0002
2378							index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0006
2379							index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0004
2380							index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2381							index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0AD8 subTree
2382							index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2383							index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0F18 subTree
2384							Tree at: 0000 0000 0000 0AD8 length: 0000 0000 0000 0007
2385							Keys: 0000 0B18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2386							Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0000 0000 0006 0041 1B34 0000 0001 0000 0009
2387							Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2388							index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2389							index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2390							index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 1B34
2391							index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2392							index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0000
2393							index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0002
2394							index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0000
2395							end
2396							Tree at: 0000 0000 0000 0F18 length: 0000 0000 0000 000B
2397							Keys: 0000 0F58 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2398							Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0DD8 0000 0009 0000 0C18 0000 0009 0000 0003 0000 0004 0000 0013 0000 0003 0041 3220 0000 0003 0000 0009
2399							Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2400							index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2401							index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2402							index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 3220
2403							index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0003
2404							index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0013
2405							index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0004
2406							index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0003
2407							index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2408							index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0C18 subTree
2409							index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2410							index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0DD8 subTree
2411							Tree at: 0000 0000 0000 0C18 length: 0000 0000 0000 0007
2412							Keys: 0000 0C58 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2413							Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0001 0000 0007 0000 0008 0000 0002 0041 5806 0000 0001 0000 0009
2414							Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2415							index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2416							index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2417							index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 5806
2418							index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0002
2419							index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0008
2420							index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0007
2421							index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0001
2422							end
2423							Tree at: 0000 0000 0000 0DD8 length: 0000 0000 0000 0007
2424							Keys: 0000 0E18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2425							Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0003 0000 0017 0000 0006 0041 1B34 0000 0001 0000 0009
2426							Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2427							index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2428							index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2429							index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 1B34
2430							index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2431							index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0017
2432							index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0003
2433							index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0002
2434							end
2435							end
2436							end
2437							Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2438							Term
2439							Variable: 𝗮𝗮
2440							Term
2441							Dyad: 𝐩𝐥𝐮𝐬
2442							Term
2443							Ascii: abc 123
2444							Term
2445							Variable: 𝘃𝗮𝗿
2446							variable
2447							ascii
2448							variable
2449							plus
2450							equals
2451							END
2452
2453							my $s = Rutf8 $Lex->{sampleText}{ws};
2454							my $p = create (K(address, $s), operators => \&printOperatorSequence);
2455
2456							K(address, $s)->printOutZeroString; # Print input string
2457							$p->print; # Print parse
2458
2459							$p->traverseParseTree; # Traverse tree printing terms # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2460
2461
2462							Assemble(debug => 0, eq => <
2463							𝗮𝑎𝑠𝑠𝑖𝑔𝑛⌊〈❨𝗯𝗽❩〉𝐩𝐥𝐮𝐬❪𝘀𝗰❫⌋⟢𝗮𝗮𝑎𝑠𝑠𝑖𝑔𝑛❬𝗯𝗯𝐩𝐥𝐮𝐬𝗰𝗰❭⟢
2464							Semicolon
2465							Term
2466							Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2467							Term
2468							Variable: 𝗮
2469							Term
2470							Brackets: ⌊⌋
2471							Term
2472							Term
2473							Dyad: 𝐩𝐥𝐮𝐬
2474							Term
2475							Brackets: ❨❩
2476							Term
2477							Term
2478							Brackets: ❬❭
2479							Term
2480							Term
2481							Variable: 𝗯𝗽
2482							Term
2483							Brackets: ❰❱
2484							Term
2485							Term
2486							Variable: 𝘀𝗰
2487							Term
2488							Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2489							Term
2490							Variable: 𝗮𝗮
2491							Term
2492							Brackets: ❴❵
2493							Term
2494							Term
2495							Dyad: 𝐩𝐥𝐮𝐬
2496							Term
2497							Variable: 𝗯𝗯
2498							Term
2499							Variable: 𝗰𝗰
2500							variable
2501							variable
2502							variable
2503							plus
2504							assign
2505							variable
2506							variable
2507							variable
2508							plus
2509							assign
2510							semiColon
2511							END
2512
2513
2514							=head1 Print
2515
2516							Print a parse tree
2517
2518							=head2 print($parse)
2519
2520							Print a parse tree.
2521
2522							Parameter Description
2523							1 $parse Parse tree
2524
2525							B
2526
2527
2528
2529							create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2530
2531
2532							ok Assemble(debug => 0, eq => <
2533							Assign: 𝑎
2534							Term
2535							Variable: 𝗮
2536							Term
2537							Variable: 𝗯
2538							END
2539
2540
2541							=head2 dumpParseTree($parse)
2542
2543							Dump the parse tree.
2544
2545							Parameter Description
2546							1 $parse Parse tree
2547
2548							=head1 Execute
2549
2550							Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
2551
2552							=head2 lexToSub($parse, $alphabet, $op, $sub)
2553
2554							Map a lexical item to a processing subroutine.
2555
2556							Parameter Description
2557							1 $parse Sub quarks
2558							2 $alphabet The alphabet number
2559							3 $op The operator name in that alphabet
2560							4 $sub Subroutine definition
2561
2562							=head2 dyad($parse, $text, $sub)
2563
2564							Define a method for a dyadic operator.
2565
2566							Parameter Description
2567							1 $parse Sub quarks
2568							2 $text The name of the operator as a utf8 string
2569							3 $sub Associated subroutine definition
2570
2571							=head2 assign($parse, $text, $sub)
2572
2573							Define a method for an assign operator.
2574
2575							Parameter Description
2576							1 $parse Sub quarks
2577							2 $text The name of the operator as a utf8 string
2578							3 $sub Associated subroutine definition
2579
2580							=head2 prefix($parse, $text, $sub)
2581
2582							Define a method for a prefix operator.
2583
2584							Parameter Description
2585							1 $parse Sub quarks
2586							2 $text The name of the operator as a utf8 string
2587							3 $sub Associated subroutine definition
2588
2589							=head2 suffix($parse, $text, $sub)
2590
2591							Define a method for a suffix operator.
2592
2593							Parameter Description
2594							1 $parse Sub quarks
2595							2 $text The name of the operator as a utf8 string
2596							3 $sub Associated subroutine definition
2597
2598							=head2 ascii($parse, $sub)
2599
2600							Define a method for ascii text.
2601
2602							Parameter Description
2603							1 $parse Sub quarks
2604							2 $sub Associated subroutine definition
2605
2606							=head2 semiColon($parse, $sub)
2607
2608							Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
2609
2610							Parameter Description
2611							1 $parse Sub quarks
2612							2 $sub Associated subroutine definition
2613
2614							=head2 variable($parse, $sub)
2615
2616							Define a method for a variable.
2617
2618							Parameter Description
2619							1 $parse Sub quarks
2620							2 $sub Associated subroutine definition
2621
2622							=head2 bracket($parse, $open, $sub)
2623
2624							Define a method for a bracket operator.
2625
2626							Parameter Description
2627							1 $parse Sub quarks
2628							2 $open Opening parenthesis
2629							3 $sub Associated subroutine
2630
2631							=head1 Alphabets
2632
2633							Translate between alphabets.
2634
2635							=head2 asciiToAssignLatin($in)
2636
2637							Translate ascii to the corresponding letters in the assign latin alphabet.
2638
2639							Parameter Description
2640							1 $in A string of ascii
2641
2642							=head2 asciiToAssignGreek($in)
2643
2644							Translate ascii to the corresponding letters in the assign greek alphabet.
2645
2646							Parameter Description
2647							1 $in A string of ascii
2648
2649							=head2 asciiToDyadLatin($in)
2650
2651							Translate ascii to the corresponding letters in the dyad latin alphabet.
2652
2653							Parameter Description
2654							1 $in A string of ascii
2655
2656							=head2 asciiToDyadGreek($in)
2657
2658							Translate ascii to the corresponding letters in the dyad greek alphabet.
2659
2660							Parameter Description
2661							1 $in A string of ascii
2662
2663							=head2 asciiToPrefixLatin($in)
2664
2665							Translate ascii to the corresponding letters in the prefix latin alphabet.
2666
2667							Parameter Description
2668							1 $in A string of ascii
2669
2670							=head2 asciiToPrefixGreek($in)
2671
2672							Translate ascii to the corresponding letters in the prefix greek alphabet.
2673
2674							Parameter Description
2675							1 $in A string of ascii
2676
2677							=head2 asciiToSuffixLatin($in)
2678
2679							Translate ascii to the corresponding letters in the suffix latin alphabet.
2680
2681							Parameter Description
2682							1 $in A string of ascii
2683
2684							=head2 asciiToSuffixGreek($in)
2685
2686							Translate ascii to the corresponding letters in the suffix greek alphabet.
2687
2688							Parameter Description
2689							1 $in A string of ascii
2690
2691							=head2 asciiToVariableLatin($in)
2692
2693							Translate ascii to the corresponding letters in the suffix latin alphabet.
2694
2695							Parameter Description
2696							1 $in A string of ascii
2697
2698							=head2 asciiToVariableGreek($in)
2699
2700							Translate ascii to the corresponding letters in the suffix greek alphabet.
2701
2702							Parameter Description
2703							1 $in A string of ascii
2704
2705							=head2 asciiToEscaped($in)
2706
2707							Translate ascii to the corresponding letters in the escaped ascii alphabet.
2708
2709							Parameter Description
2710							1 $in A string of ascii
2711
2712							=head2 semiColonChar()
2713
2714							Translate ascii to the corresponding letters in the escaped ascii alphabet.
2715
2716
2717							=head2 printOperatorSequence($parse)
2718
2719							Print the operator calling sequence.
2720
2721							Parameter Description
2722							1 $parse Parse
2723
2724							=head2 executeOperator($parse)
2725
2726							Print the operator calling sequence.
2727
2728							Parameter Description
2729							1 $parse Parse
2730
2731
2732							=head1 Hash Definitions
2733
2734
2735
2736
2737							=head2 Unisyn::Parse Definition
2738
2739
2740							Description of parse
2741
2742
2743
2744
2745							=head3 Output fields
2746
2747
2748							=head4 address8
2749
2750							Address of source string as utf8
2751
2752							=head4 arena
2753
2754							Arena containing tree
2755
2756							=head4 fails
2757
2758							Number of failures encountered in this parse
2759
2760							=head4 operators
2761
2762							Methods implementing each lexical operator
2763
2764							=head4 parse
2765
2766							Offset to the head of the parse tree
2767
2768							=head4 quarks
2769
2770							Quarks representing the strings used in this parse
2771
2772							=head4 size8
2773
2774							Size of source string as utf8
2775
2776							=head4 source32
2777
2778							Source text as utf32
2779
2780							=head4 sourceLength32
2781
2782							Length of utf32 string
2783
2784							=head4 sourceSize32
2785
2786							Size of utf32 allocation
2787
2788
2789
2790							=head1 Private Methods
2791
2792							=head2 getAlpha($register, $address, $index)
2793
2794							Load the position of a lexical item in its alphabet from the current character.
2795
2796							Parameter Description
2797							1 $register Register to load
2798							2 $address Address of start of string
2799							3 $index Index into string
2800
2801							=head2 getLexicalCode($register, $address, $index)
2802
2803							Load the lexical code of the current character in memory into the specified register.
2804
2805							Parameter Description
2806							1 $register Register to load
2807							2 $address Address of start of string
2808							3 $index Index into string
2809
2810							=head2 putLexicalCode($register, $address, $index, $code)
2811
2812							Put the specified lexical code into the current character in memory.
2813
2814							Parameter Description
2815							1 $register Register used to load code
2816							2 $address Address of string
2817							3 $index Index into string
2818							4 $code Code to put
2819
2820							=head2 loadCurrentChar()
2821
2822							Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
2823
2824
2825							=head2 checkStackHas($depth)
2826
2827							Check that we have at least the specified number of elements on the stack.
2828
2829							Parameter Description
2830							1 $depth Number of elements required on the stack
2831
2832							=head2 pushElement()
2833
2834							Push the current element on to the stack.
2835
2836
2837							=head2 pushEmpty()
2838
2839							Push the empty element on to the stack.
2840
2841
2842							=head2 lexicalNameFromLetter($l)
2843
2844							Lexical name for a lexical item described by its letter.
2845
2846							Parameter Description
2847							1 $l Letter of the lexical item
2848
2849							=head2 lexicalNumberFromLetter($l)
2850
2851							Lexical number for a lexical item described by its letter.
2852
2853							Parameter Description
2854							1 $l Letter of the lexical item
2855
2856							=head2 lexicalItemLength($source32, $offset)
2857
2858							Put the length of a lexical item into variable B.
2859
2860							Parameter Description
2861							1 $source32 B of utf32 source representation
2862							2 $offset B to lexical item in utf32
2863
2864							=head2 new($depth, $description)
2865
2866							Create a new term in the parse tree rooted on the stack.
2867
2868							Parameter Description
2869							1 $depth Stack depth to be converted
2870							2 $description Text reason why we are creating a new term
2871
2872							=head2 error($message)
2873
2874							Write an error message and stop.
2875
2876							Parameter Description
2877							1 $message Error message
2878
2879							=head2 testSet($set, $register)
2880
2881							Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
2882
2883							Parameter Description
2884							1 $set Set of lexical letters
2885							2 $register Register to test
2886
2887							=head2 checkSet($set)
2888
2889							Check that one of a set of items is on the top of the stack or complain if it is not.
2890
2891							Parameter Description
2892							1 $set Set of lexical letters
2893
2894							=head2 reduce($priority)
2895
2896							Convert the longest possible expression on top of the stack into a term at the specified priority.
2897
2898							Parameter Description
2899							1 $priority Priority of the operators to reduce
2900
2901							=head2 reduceMultiple($priority)
2902
2903							Reduce existing operators on the stack.
2904
2905							Parameter Description
2906							1 $priority Priority of the operators to reduce
2907
2908							=head2 accept_a()
2909
2910							Assign.
2911
2912
2913							=head2 accept_b()
2914
2915							Open.
2916
2917
2918							=head2 accept_B()
2919
2920							Closing parenthesis.
2921
2922
2923							=head2 accept_d()
2924
2925							Infix but not assign or semi-colon.
2926
2927
2928							=head2 accept_p()
2929
2930							Prefix.
2931
2932
2933							=head2 accept_q()
2934
2935							Post fix.
2936
2937
2938							=head2 accept_s()
2939
2940							Semi colon.
2941
2942
2943							=head2 accept_v()
2944
2945							Variable.
2946
2947
2948							=head2 parseExpression()
2949
2950							Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
2951
2952
2953							=head2 MatchBrackets(@parameters)
2954
2955							Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
2956
2957							Parameter Description
2958							1 @parameters Parameters
2959
2960							=head2 ClassifyNewLines(@parameters)
2961
2962							Scan input string looking for opportunities to convert new lines into semi colons.
2963
2964							Parameter Description
2965							1 @parameters Parameters
2966
2967							=head2 ClassifyWhiteSpace(@parameters)
2968
2969							Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
2970
2971							Parameter Description
2972							1 @parameters Parameters
2973
2974							=head2 reload($parse, $parameters)
2975
2976							Reload the variables associated with a parse.
2977
2978							Parameter Description
2979							1 $parse Parse
2980							2 $parameters Hash of variable parameters
2981
2982							=head2 parseUtf8($parse, @parameters)
2983
2984							Parse a unisyn expression encoded as utf8 and return the parse tree.
2985
2986							Parameter Description
2987							1 $parse Parse
2988							2 @parameters Parameters
2989
2990							=head2 printLexicalItem($parse, $source32, $offset, $size)
2991
2992							Print the utf8 string corresponding to a lexical item at a variable offset.
2993
2994							Parameter Description
2995							1 $parse Parse tree
2996							2 $source32 B of utf32 source representation
2997							3 $offset B to lexical item in utf32
2998							4 $size B in utf32 chars of item
2999
3000							=head2 showAlphabet($alphabet)
3001
3002							Show an alphabet.
3003
3004							Parameter Description
3005							1 $alphabet Alphabet name
3006
3007							=head2 T($key, $expected, %options)
3008
3009							Parse some text and dump the results.
3010
3011							Parameter Description
3012							1 $key Key of text to be parsed
3013							2 $expected Expected result
3014							3 %options Options
3015
3016							=head2 C($key, $expected, %options)
3017
3018							Parse some text and print the results.
3019
3020							Parameter Description
3021							1 $key Key of text to be parsed
3022							2 $expected Expected result
3023							3 %options Options
3024
3025
3026							=head1 Index
3027
3028
3029							1 L - Assign.
3030
3031							2 L - Closing parenthesis.
3032
3033							3 L - Open.
3034
3035							4 L - Infix but not assign or semi-colon.
3036
3037							5 L - Prefix.
3038
3039							6 L - Post fix.
3040
3041							7 L - Semi colon.
3042
3043							8 L - Variable.
3044
3045							9 L - Define a method for ascii text.
3046
3047							10 L - Translate ascii to the corresponding letters in the assign greek alphabet.
3048
3049							11 L - Translate ascii to the corresponding letters in the assign latin alphabet.
3050
3051							12 L - Translate ascii to the corresponding letters in the dyad greek alphabet.
3052
3053							13 L - Translate ascii to the corresponding letters in the dyad latin alphabet.
3054
3055							14 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3056
3057							15 L - Translate ascii to the corresponding letters in the prefix greek alphabet.
3058
3059							16 L - Translate ascii to the corresponding letters in the prefix latin alphabet.
3060
3061							17 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3062
3063							18 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3064
3065							19 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3066
3067							20 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3068
3069							21 L - Define a method for an assign operator.
3070
3071							22 L - Define a method for a bracket operator.
3072
3073							23 L - Parse some text and print the results.
3074
3075							24 L - Check that one of a set of items is on the top of the stack or complain if it is not.
3076
3077							25 L - Check that we have at least the specified number of elements on the stack.
3078
3079							26 L - Scan input string looking for opportunities to convert new lines into semi colons.
3080
3081							27 L - Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.
3082
3083							28 L - Create a new unisyn parse from a utf8 string.
3084
3085							29 L - Dump the parse tree.
3086
3087							30 L - Define a method for a dyadic operator.
3088
3089							31 L - Write an error message and stop.
3090
3091							32 L - Print the operator calling sequence.
3092
3093							33 L - Load the position of a lexical item in its alphabet from the current character.
3094
3095							34 L - Load the lexical code of the current character in memory into the specified register.
3096
3097							35 L - Put the length of a lexical item into variable B.
3098
3099							36 L - Lexical name for a lexical item described by its letter.
3100
3101							37 L - Lexical number for a lexical item described by its letter.
3102
3103							38 L - Map a lexical item to a processing subroutine.
3104
3105							39 L - Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
3106
3107							40 L - Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket.
3108
3109							41 L - Create a new term in the parse tree rooted on the stack.
3110
3111							42 L - Parse the string of classified lexical items addressed by register $start of length $length.
3112
3113							43 L - Parse a unisyn expression encoded as utf8 and return the parse tree.
3114
3115							44 L - Define a method for a prefix operator.
3116
3117							45 L - Print a parse tree.
3118
3119							46 L - Print the utf8 string corresponding to a lexical item at a variable offset.
3120
3121							47 L - Print the operator calling sequence.
3122
3123							48 L - Push the current element on to the stack.
3124
3125							49 L - Push the empty element on to the stack.
3126
3127							50 L - Put the specified lexical code into the current character in memory.
3128
3129							51 L - Convert the longest possible expression on top of the stack into a term at the specified priority.
3130
3131							52 L - Reduce existing operators on the stack.
3132
3133							53 L - Reload the variables associated with a parse.
3134
3135							54 L - Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
3136
3137							55 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3138
3139							56 L - Show an alphabet.
3140
3141							57 L - Define a method for a suffix operator.
3142
3143							58 L - Parse some text and dump the results.
3144
3145							59 L - Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
3146
3147							60 L - Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
3148
3149							61 L - Define a method for a variable.
3150
3151							=head1 Installation
3152
3153							This module is written in 100% Pure Perl and, thus, it is easy to read,
3154							comprehend, use, modify and install via B:
3155
3156							sudo cpan install Unisyn::Parse
3157
3158							=head1 Author
3159
3160							L
3161
3162							L
3163
3164							=head1 Copyright
3165
3166							Copyright (c) 2016-2021 Philip R Brenan.
3167
3168							This module is free software. It may be used, redistributed and/or modified
3169							under the same terms as Perl itself.
3170
3171							=cut
3172
3173
3174
3175							# Tests and documentation
3176
3177							sub test
3178	1			1	0	6	{my $p = __PACKAGE__;
3179	1					8	binmode($_, ":utf8") for STDOUT, STDERR;
3180	1	50				62	return if eval "eof(${p}::DATA)";
3181	1					52	my $s = eval "join('', <${p}::DATA>)";
3182	1	50				22	$@ and die $@;
3183	1	0		1	1	6	eval $s;
	1			1	1	2
	1			0	1	6
	1			0	1	633
	1			0		58283
	1			0		7
	1			0		126
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
3184	0	0					$@ and die $@;
3185	0						1
3186							}
3187
3188							test unless caller;
3189
3190							1;
3191							# podDocumentation
3192							__DATA__