File Coverage

blib/lib/Unisyn/Parse.pm
Criterion Covered Total %
statement 49 1143 4.2
branch 2 100 2.0
condition 0 9 0.0
subroutine 15 104 14.4
pod 62 64 96.8
total 128 1420 9.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/NasmX86/lib/ -I/home/phil/perl/cpan/AsmC/lib/
2             #-------------------------------------------------------------------------------
3             # Parse a Unisyn expression.
4             # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             # Finished in 13.14s, bytes: 2,655,008, execs: 465,858
8             # Can we remove more Pushr by doing one big save in parseutf8 ?
9             package Unisyn::Parse;
10             our $VERSION = "20211013";
11 1     1   2788 use warnings FATAL => qw(all);
  1         8  
  1         47  
12 1     1   6 use strict;
  1         2  
  1         38  
13 1     1   6 use Carp qw(confess cluck);
  1         2  
  1         93  
14 1     1   528 use Data::Dump qw(dump);
  1         7920  
  1         68  
15 1     1   4025 use Data::Table::Text qw(:all !parse);
  1         144825  
  1         1866  
16 1     1   6691 use Nasm::X86 qw(:all);
  1         172742  
  1         3087  
17 1     1   14 use feature qw(say current_sub);
  1         3  
  1         108  
18 1     1   7 use utf8;
  1         3  
  1         8  
19              
20             makeDieConfess;
21              
22             my $develop = -e q(/home/phil/); # Developing
23             our $Parse; # One of the advantages of creating a parse tree is that we can perform parse one at a time making it safe to globalize this variable. The alternative is to pass this variable between all the parsing calls which would obscure their workings greatly.
24             our $ParseUtf8SubDef; # The definition of the subroutine that does the parsing so that we can reuse its parameters when we call L.
25             our $debug = 0; # Print evolution of stack if true.
26              
27             #D1 Create # Create a Unisyn parse of a utf8 string.
28              
29             sub create($%) # Create a new unisyn parse from a utf8 string.
30 0     0 1 0 {my ($address, %options) = @_; # Address of a zero terminated utf8 source string to parse as a variable, parse options.
31 0 0       0 @_ >= 1 or confess "One or more parameters";
32              
33 0         0 my $a = CreateArena; # Arena to hold parse tree - every parse tree gets its own arena so that we can free parses separately
34 0         0 my $size = StringLength string => $address; # Length of input utf8
35              
36 0         0 my $p = $Parse = genHash(__PACKAGE__, # Description of parse
37             arena => $a, # Arena containing tree
38             size8 => $size, # Size of source string as utf8
39             address8 => $address, # Address of source string as utf8
40             source32 => V(source32), # Source text as utf32
41             sourceSize32 => V(sourceSize32), # Size of utf32 allocation
42             sourceLength32 => V(sourceLength32), # Length of utf32 string
43             parse => V('parse'), # Offset to the head of the parse tree
44             fails => V('fail'), # Number of failures encountered in this parse
45             quarks => $a->CreateQuarks, # Quarks representing the strings used in this parse
46             operators => undef, # Methods implementing each lexical operator
47             width => RegisterSize(eax), # Size of entries in exec chain
48             );
49              
50 0 0       0 if (my $o = $options{operators}) # Operator methods for lexical items
51 0         0 {$p->operators = $a->CreateQuarks; # Create quark set to translate operator names to offsets
52 0         0 $o->($p);
53             }
54              
55 0         0 $p->parseUtf8; # Parse utf8 source string
56              
57 0         0 $p
58             }
59              
60             #D1 Parse # Parse Unisyn expressions
61              
62             our $Lex = &lexicalData; # Lexical table definitions
63              
64             our $ses = RegisterSize rax; # Size of an element on the stack
65             our ($w1, $w2, $w3) = (r8, r9, r10); # Work registers
66             our $prevChar = r11; # The previous character parsed
67             our $index = r12; # Index of current element
68             our $element = r13; # Contains the item being parsed
69             our $start = r14; # Start of the parse string
70             our $size = r15; # Length of the input string
71             our $parseStackBase = rsi; # The base of the parsing stack in the stack
72             #ur $arenaReg = rax; # The arena in which we are building the parse tree
73             our $indexScale = 4; # The size of a utf32 character
74             our $lexCodeOffset = 3; # The offset in a classified character to the lexical code.
75             our $bitsPerByte = 8; # The number of bits in a byte
76              
77             our $Ascii = $$Lex{lexicals}{Ascii} {number}; # Ascii
78             our $assign = $$Lex{lexicals}{assign} {number}; # Assign
79             our $dyad = $$Lex{lexicals}{dyad} {number}; # Dyad
80             our $CloseBracket = $$Lex{lexicals}{CloseBracket} {number}; # Close bracket
81             our $empty = $$Lex{lexicals}{empty} {number}; # Empty element
82             our $NewLineSemiColon = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semicolon
83             our $OpenBracket = $$Lex{lexicals}{OpenBracket} {number}; # Open bracket
84             our $prefix = $$Lex{lexicals}{prefix} {number}; # Prefix operator
85             our $semiColon = $$Lex{lexicals}{semiColon} {number}; # Semicolon
86             our $suffix = $$Lex{lexicals}{suffix} {number}; # Suffix
87             our $term = $$Lex{lexicals}{term} {number}; # Term
88             our $variable = $$Lex{lexicals}{variable} {number}; # Variable
89             our $WhiteSpace = $$Lex{lexicals}{WhiteSpace} {number}; # Variable
90             our $firstSet = $$Lex{structure}{first}; # First symbols allowed
91             our $lastSet = $$Lex{structure}{last}; # Last symbols allowed
92             our $bracketsBase = $$Lex{bracketsBase}; # Base lexical item for brackets
93              
94             our $asciiNewLine = ord("\n"); # New line in ascii
95             our $asciiSpace = ord(' '); # Space in ascii
96              
97             # Operator description
98             our $opType = 0; # Operator type field - currently always a term
99             our $opCount = 1; # Number of operands for this operator
100             our $opSub = 2; # Offset of sub associated with this lexical item
101             our $opChain = 3; # The execution chain produced by traversing the parse tree in post order.
102              
103             # Lexical item description
104             our $lexItemType = 0; # Field number of lexical item type in the description of a lexical item
105             our $lexItemOffset = 1; # Field number of the offset in the utf32 source of the lexical item in the description of a lexical item or - if this a term - the offset of the invariant first block of the sub tree
106             our $lexItemLength = 2; # Field number of the length of the lexical item in the utf32 source in the description of a lexical item
107             our $lexItemQuark = 3; # Quark containing the text of this lexical item.
108             our $lexItemWidth = 4; # The number of fields used to describe a lexical item in the parse tree
109              
110             # Execution chain
111             our $execChainNext = 0; # Next block offset
112             our $execChainTerm = 1; # Corresponding term offset
113             our $execChainSub = 2; # Offset of sub associated with term
114              
115             sub getAlpha($$$) #P Load the position of a lexical item in its alphabet from the current character.
116 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
117 0         0 Mov $register, "[$address+$indexScale*$index]"; # Load lexical code
118             }
119              
120             sub getLexicalCode($$$) #P Load the lexical code of the current character in memory into the specified register.
121 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
122 0         0 Mov $register, "[$address+$indexScale*$index+$lexCodeOffset]"; # Load lexical code
123             }
124              
125             sub putLexicalCode($$$$) #P Put the specified lexical code into the current character in memory.
126 0     0 1 0 {my ($register, $address, $index, $code) = @_; # Register used to load code, address of string, index into string, code to put
127 0         0 Mov $register, $code;
128 0         0 Mov "[$address+$indexScale*$index+$lexCodeOffset]", $register; # Save lexical code
129             }
130              
131             sub loadCurrentChar() #P Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
132 0     0 1 0 {my $r = $element."b"; # Classification byte
133              
134 0         0 Mov $element, $index; # Load index of character as upper dword
135 0         0 Shl $element, $indexScale * $bitsPerByte; # Save the index of the character in the upper half of the register so that we know where the character came from.
136 0         0 getLexicalCode $r, $start, $index; # Load lexical classification as lowest byte
137              
138 0         0 Cmp $r, $bracketsBase; # Brackets , due to their frequency, start after 0x10 with open even and close odd
139             IfGe # Brackets
140             Then
141 0     0   0 {And $r, 1 # Bracket: 0 - open, 1 - close
142             },
143             Else
144 0     0   0 {Cmp $r, $Ascii; # Ascii is a type of variable
145             IfEq
146             Then
147 0         0 {Mov $r, $variable;
148             },
149             Else
150 0         0 {Cmp $r, $NewLineSemiColon; # New line semicolon is a type of semi colon
151             IfEq
152             Then
153 0         0 {Mov $r, $semiColon;
154 0         0 };
155 0         0 };
156 0         0 };
157             }
158              
159             sub checkStackHas($) #P Check that we have at least the specified number of elements on the stack.
160 0     0 1 0 {my ($depth) = @_; # Number of elements required on the stack
161 0         0 Mov $w1, $parseStackBase;
162 0         0 Sub $w1, rsp;
163 0         0 Cmp $w1, $ses * $depth;
164             }
165              
166             sub pushElement() #P Push the current element on to the stack.
167 0     0 1 0 {Push $element;
168 0 0       0 if ($debug)
169 0         0 {PrintErrStringNL "Push Element:";
170 0         0 PrintErrRegisterInHex $element;
171             }
172             }
173              
174             sub pushEmpty() #P Push the empty element on to the stack.
175 0     0 1 0 {Mov $w1, $index;
176 0         0 Shl $w1, $indexScale * $bitsPerByte;
177 0         0 Or $w1, $empty;
178 0         0 Push $w1;
179 0 0       0 if ($debug)
180 0         0 {PrintErrStringNL "Push Empty";
181             }
182             }
183              
184             sub lexicalNameFromLetter($) #P Lexical name for a lexical item described by its letter.
185 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
186 0         0 my %l = $Lex->{treeTermLexicals}->%*;
187 0         0 my $n = $l{$l};
188 0 0       0 confess "No such lexical: $l" unless $n;
189             $n->{short}
190 0         0 }
191              
192             sub lexicalNumberFromLetter($) #P Lexical number for a lexical item described by its letter.
193 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
194 0         0 my $n = lexicalNameFromLetter $l;
195 0         0 my $N = $Lex->{lexicals}{$n}{number};
196 0 0       0 confess "No such lexical named: $n" unless defined $N;
197 0         0 $N
198             }
199              
200             sub lexicalItemLength($$) #P Put the length of a lexical item into variable B.
201 0     0 1 0 {my ($source32, $offset) = @_; # B
of utf32 source representation, B to lexical item in utf32
202              
203             my $s = Subroutine
204 0     0   0 {my ($p, $s) = @_; # Parameters
205             # PushR r14, r15; # We do not need to save the zmm and mask registers because they are only used as temporary work registers and they have been saved in L
206              
207 0         0 $$p{source32}->setReg(r14);
208 0         0 $$p{offset} ->setReg(r15);
209 0         0 Vmovdqu8 zmm0, "[r14+4*r15]"; # Load source to examine
210 0         0 Pextrw r15, xmm0, 1; # Extract lexical type of first element
211              
212             OrBlock # The size of a bracket or a semi colon is always 1
213 0         0 {my ($pass, $end, $start) = @_;
214 0         0 Cmp r15, $OpenBracket;
215 0         0 Je $pass;
216 0         0 Cmp r15, $CloseBracket;
217 0         0 Je $pass;
218 0         0 Cmp r15, $semiColon;
219 0         0 Je $pass;
220              
221 0         0 Vpbroadcastw zmm1, r15w; # Broadcast lexical type
222 0         0 Vpcmpeqw k0, zmm0, zmm1; # Check extent of first lexical item up to 16
223 0         0 Mov r15, 0x55555555; # Set odd positions to one where we know the match will fail
224 0         0 Kmovq k1, r15;
225 0         0 Korq k2, k0, k1; # Fill in odd positions
226              
227 0         0 Kmovq r15, k2;
228 0         0 Not r15; # Swap zeroes and ones
229 0         0 Tzcnt r15, r15; # Trailing zero count is a factor two too big
230 0         0 Shr r15, 1; # Normalized count of number of characters in lexical item
231 0         0 $$p{size}->getReg(r15); # Save size in supplied variable
232             }
233             Pass # Show unitary length
234 0         0 {my ($end, $pass, $start) = @_;
235 0         0 $$p{size}->getConst(1); # Save size in supplied variable
236 0         0 };
237              
238             # PopR;
239 0         0 } [qw(offset source32 size)],
240             name => q(Unisyn::Parse::lexicalItemLength);
241              
242 0         0 $s->call(offset => $offset, source32 => $source32, my $size = V(size));
243              
244 0         0 $size
245             }
246              
247             sub new($$) #P Create a new term in the parse tree rooted on the stack.
248 0     0 1 0 {my ($depth, $description) = @_; # Stack depth to be converted, text reason why we are creating a new term
249              
250 0         0 my $wr = RegisterSize rax; # Width of general purpose register
251              
252             my $s = Subroutine
253 0     0   0 {my ($locals) = @_; # Parameters
254 0         0 my $a = DescribeArena $$locals{bs}; # Address arena
255              
256             my $quarks = $Parse->quarks->reload(arena => $$locals{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
257             array => $$locals{numbersToStringsFirst},
258 0         0 tree => $$locals{stringsToNumbersFirst});
259              
260             my $operators = $Parse->operators ? $Parse->operators->reload # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
261             (arena => $$locals{bs},
262             array => $$locals{opNumbersToStringsFirst},
263 0 0       0 tree => $$locals{opStringsToNumbersFirst}) : undef;
264              
265 0         0 my $t = $a->CreateTree; # Create a tree in the arena to hold the details of the lexical elements on the stack
266 0         0 my $o = V(offset); # Offset into source for lexical item
267 0         0 $t->insert(V(key, $opType), K(data, $term)); # Create a term - we only have terms at the moment in the parse tree - but that might change in the future
268 0         0 $t->insert(V(key, $opCount), K(data, $depth)); # The number of elements in the term which is the number of operands for the operator
269              
270 0         0 my $liOnStack = $w1; # The lexical item as it appears on the stack
271 0         0 my $liType = $w2; # The lexical item type
272 0         0 my $liOffset = $w3; # The lexical item offset in the source
273              
274 0         0 PushR zmm0; # Put the simulated stack on the stack
275              
276 0         0 for my $i(1..$depth) # Each term
277 0         0 {my $j = $depth + 1 - $i;
278 0         0 my $k = ($i - 1) * $wr; # Position in simulated stack
279 0         0 Mov $liOnStack, "[rsp+$k]"; # Copy term out of simulated stack
280 0 0       0 PrintErrRegisterInHex $liOnStack if $debug;
281              
282 0         0 Mov $liOffset, $liOnStack; # Offset of either the text in the source or the offset of the first block of the tree describing a term
283 0         0 Shr $liOffset, 32; # Offset in source: either the actual text of the offset of the first block of the tree containing a term shifted over to look as if it were an offset in the source
284 0         0 $o->getReg($liOffset); # Offset of lexical item in source or offset of first block in tree describing a term
285              
286 0         0 ClearRegisters $liType;
287 0         0 Mov $liType."b", $liOnStack."b"; # The lexical item type in the lowest byte, the rest clear.
288              
289 0         0 Cmp $liType, $term; # Check whether the lexical item on the stack is a term
290             IfEq # Insert a sub tree if we are inserting a term
291             Then
292 0         0 {$t->insertTree(K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset of first block in the tree representing the term
293             },
294             Else # Insert the offset in the utf32 source if we are not on a term
295 0         0 {$t->insert (K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset in source of non term
296 0         0 };
297              
298 0         0 Cmp $liType, $variable; # Check whether the lexical item is a variable which can also represent ascii
299             IfEq # Insert a sub tree if we are inserting a term
300             Then
301 0         0 {Mov $liType."b", "[$start+4*$liOffset+3]"; # Load lexical type from source
302 0         0 };
303              
304 0         0 Cmp $liType, $term; # Length of lexical item that is not a term
305             IfNe
306             Then # Not a term
307 0         0 {my $size = lexicalItemLength(V(address, $start), $o); # Get the size of the lexical item at the offset indicated on the stack
308 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemLength), $size); # Save size of lexical item in parse tree
309              
310 0         0 my $s = CreateShortString(1); # Short string to hold text of lexical item so we can load it into a quark
311 0         0 $s->clear; # Perhaps not strictly necessary but easier to debug
312 0         0 PushR r15; # Probably not needed as saved in L
313 0 0 0     0 r15 ne $start && r15 ne $liOffset or confess "r15 in use";
314 0         0 Lea r15, "[$start+4*$liOffset]"; # Start address of lexical item
315 0         0 my $startAddress = V(address, r15); # Save start address of lexical item
316 0         0 PopR;
317              
318 0         0 Cmp $liType, $OpenBracket; # Is it a bracket ?
319             IfEq
320             Then
321 0         0 {ClearRegisters $liType; # Compute lexical type of bracket by adding bracket number to the start of the bracket range
322 0         0 Mov $liType."b", "[$start+4*$liOffset+3]"; # Load bracket number
323 0         0 Shl $liType, 16; # Shift bracket base into position
324 0         0 Add $liType, 2; # Set length of short string as two = (lexical type, bracket number)
325 0         0 Pinsrd "xmm1", $liType."d", 0; # Load short string
326 0         0 Shr $liType, 16; # Move lexical type back into position for insertion into the parse tree
327             },
328             Else
329 0         0 {$s->loadDwordBytes(0, $startAddress, $size, 1); # Load text of lexical item into short string leaving space for lexical type
330 0         0 Pinsrb "xmm1", $liType."b", 1; # Set lexical type as the first byte of the short string
331 0         0 };
332              
333 0         0 my $q = $quarks->quarkFromShortString($s); # Find the quark matching the lexical item if there is such a quark
334 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemQuark), $q); # Save quark number of lexical item in parse tree
335 0 0       0 if ($operators) # The parse has operator definitions
336 0 0       0 {if ($j == 1) # The operator quark is always first
337             {OrBlock # Like an operator or like a variable?
338 0         0 {my ($pass, $end, $start) = @_;
339 0         0 Cmp $liType, $variable;
340 0         0 Je $pass; # Process a variable
341 0         0 Cmp $liType, $Ascii;
342 0         0 Je $pass; # Process ascii constant
343 0         0 Cmp $liType, $semiColon;
344 0         0 Je $pass; # Process Semicolon
345 0         0 Cmp $liType, $NewLineSemiColon;
346 0         0 Je $pass; # Process new line semicolon
347             # Process non variable, i.e. operators specifically
348 0         0 my $N = $operators->subFromQuarkViaQuarks($quarks, $q); # Look up the subroutine associated with this operator
349             If $N >= 0, # Found a matching operator subroutine
350             Then
351 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
352 0         0 };
353             }
354             Pass # Process variables in general or items based on variables using a short string of length 1 being the lexical type of the item in question
355 0         0 {Shl $liType, 8; # Move lexical type into second byte
356 0         0 Inc $liType; # Show length
357 0         0 Pinsrq "xmm1", $liType, 0; # Load short string
358 0         0 my $N = $operators->subFromShortString($s); # Address of sub to process variable or ascii or semicolon
359 0         0 Shr $liType, 8; # Restore lexical type
360             If $N >= 0, # Found a matching operator subroutine
361             Then
362 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
363 0         0 };
364 0         0 };
365             }
366             }
367 0         0 };
368              
369 0         0 $t->insert (V(key, $lexItemWidth * $j + $lexItemType), # Save lexical type in parse tree
370             V(data)->getReg($liType));
371             }
372             # Push new term onto the stack in place of the items popped off
373 0         0 $t->first->setReg($liOffset); # Offset of new term tree
374 0         0 Shl $liOffset, 32; # Push offset to term tree into the upper dword to make it look like a source offset
375 0         0 Or $liOffset."b", $term; # Mark as a term tree
376 0         0 $$locals{new}->getReg($liOffset); # New term comprised of a tree of old terms
377 0         0 PopR; # Restore stack to its position at the start
378             }
379 0         0 [qw(new)], with => $ParseUtf8SubDef,
380             # [qw(bs new
381             # numbersToStringsFirst stringsToNumbersFirst
382             # opNumbersToStringsFirst opStringsToNumbersFirst
383             # )],
384             name=>"Unisyn::Parse::new_$depth";
385              
386 0 0       0 PrintErrStringNL "New: $description" if $debug;
387              
388 0 0       0 if ($depth == 1) {Mov $w1, 1} # Copy the top of the real stack which holds the parse state to zmm0 so that we can adjust the stack to call L
  0 0       0  
389 0         0 elsif ($depth == 2) {Mov $w1, 3}
390 0         0 else {Mov $w1, 7}
391 0         0 Kmovq k1, $w1; # B is saved in L
392 0         0 Vmovdqu64 "zmm0{k1}", "[rsp]"; # Copy top lexical items on stack
393              
394             # $s->call(bs => $Parse->arena->bs, my $new = V('new'),
395             # numbersToStringsFirst => $Parse->quarks->numbersToStrings->first,
396             # stringsToNumbersFirst => $Parse->quarks->stringsToNumbers->first,
397             # opNumbersToStringsFirst => $Parse->operators ? $Parse->operators->subQuarks->numbersToStrings->first : 0,
398             # opStringsToNumbersFirst => $Parse->operators ? $Parse->operators->subQuarks->stringsToNumbers->first : 0,
399             # );
400              
401 0         0 $s->call(my $new = V('new'));
402              
403 0         0 $new->setReg($w1); # Save offset of new term in a work register
404 0         0 Add rsp, $depth * $wr; # Remove input terms from stack
405 0         0 Push $w1; # Save new term on stack
406             }
407              
408             sub error($) #P Write an error message and stop.
409 0     0 1 0 {my ($message) = @_; # Error message
410 0         0 PrintOutStringNL "Error: $message";
411 0         0 PrintOutString "Element: ";
412 0         0 PrintOutRegisterInHex $element;
413 0         0 PrintOutString "Index : ";
414 0         0 PrintOutRegisterInHex $index;
415 0         0 Exit(0);
416             }
417              
418             sub testSet($$) #P Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
419 0     0 1 0 {my ($set, $register) = @_; # Set of lexical letters, Register to test
420 0         0 my @n = map {sprintf("0x%x", lexicalNumberFromLetter $_)} split //, $set; # Each lexical item by number from letter
  0         0  
421 0         0 my $end = Label;
422 0         0 for my $n(@n)
423 0         0 {Cmp $register."b", $n;
424 0         0 Je $end
425             }
426 0         0 ClearZF;
427 0         0 SetLabel $end;
428             }
429              
430             sub checkSet($) #P Check that one of a set of items is on the top of the stack or complain if it is not.
431 0     0 1 0 {my ($set) = @_; # Set of lexical letters
432 0         0 my @n = map {lexicalNumberFromLetter $_} split //, $set;
  0         0  
433 0         0 my $end = Label;
434              
435 0         0 for my $n(@n)
436 0         0 {Cmp "byte[rsp]", $n;
437 0         0 Je $end
438             }
439 0         0 error("Expected one of: '$set' on the stack");
440 0         0 ClearZF;
441 0         0 SetLabel $end;
442             }
443              
444             sub reduce($) #P Convert the longest possible expression on top of the stack into a term at the specified priority.
445 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
446 0         0 $priority =~ m(\A(1|3)\Z); # Level: 1 - all operators, 2 - priority 2 operators
447 0         0 my ($success, $end) = map {Label} 1..2; # Exit points
  0         0  
448              
449 0         0 checkStackHas 3; # At least three elements on the stack
450             IfGe
451             Then
452 0     0   0 {my ($l, $d, $r) = ($w1, $w2, $w3);
453 0         0 Mov $l, "[rsp+".(2*$ses)."]"; # Top 3 elements on the stack
454 0         0 Mov $d, "[rsp+".(1*$ses)."]";
455 0         0 Mov $r, "[rsp+".(0*$ses)."]";
456              
457 0 0       0 if ($debug)
458 0         0 {PrintErrStringNL "Reduce 3:";
459 0         0 PrintErrRegisterInHex $l, $d, $r;
460             }
461              
462 0         0 testSet("t", $l); # Parse out infix operator expression
463             IfEq
464             Then
465 0         0 {testSet("t", $r);
466             IfEq
467             Then
468 0 0       0 {testSet($priority == 1 ? "ads" : 'd', $d); # Reduce all operators or just reduce infix priority 3 operators
469             IfEq
470             Then
471 0         0 {Add rsp, 3 * $ses; # Reorder into polish notation
472 0         0 Push $_ for $d, $l, $r;
473 0         0 new(3, "Term infix term");
474 0         0 Jmp $success;
475 0         0 };
476 0         0 };
477 0         0 };
478              
479 0         0 testSet("b", $l); # Parse parenthesized term
480             IfEq
481             Then
482 0         0 {testSet("B", $r);
483             IfEq
484             Then
485 0         0 {testSet("t", $d);
486             IfEq
487             Then
488 0         0 {Add rsp, $ses;
489 0         0 new(1, "Bracketed term");
490 0         0 new(2, "Brackets for term");
491 0 0       0 PrintErrStringNL "Reduce by ( term )" if $debug;
492 0         0 Jmp $success;
493 0         0 };
494 0         0 };
495 0         0 };
496 0         0 };
497              
498 0         0 checkStackHas 2; # At least two elements on the stack
499             IfGe # Convert an empty pair of parentheses to an empty term
500             Then
501 0     0   0 {my ($l, $r) = ($w1, $w2);
502              
503 0 0       0 if ($debug)
504 0         0 {PrintErrStringNL "Reduce 2:";
505 0         0 PrintErrRegisterInHex $l, $r;
506             }
507              
508             # KeepFree $l, $r; # Why ?
509 0         0 Mov $l, "[rsp+".(1*$ses)."]"; # Top 3 elements on the stack
510 0         0 Mov $r, "[rsp+".(0*$ses)."]";
511 0         0 testSet("b", $l); # Empty pair of parentheses
512             IfEq
513             Then
514 0         0 {testSet("B", $r);
515             IfEq
516             Then
517 0         0 {Add rsp, 2 * $ses; # Pop expression
518 0         0 Push $l; # Bracket as operator
519 0         0 new(1, "Empty brackets");
520 0         0 Jmp $success;
521 0         0 };
522 0         0 };
523 0         0 testSet("s", $l); # Semi-colon, close implies remove unneeded semi
524             IfEq
525             Then
526 0         0 {testSet("B", $r);
527             IfEq
528             Then
529 0         0 {Add rsp, 2 * $ses; # Pop expression
530 0         0 Push $r;
531 0 0       0 PrintErrStringNL "Reduce by ;)" if $debug;
532 0         0 Jmp $success;
533 0         0 };
534 0         0 };
535 0         0 testSet("p", $l); # Prefix, term
536             IfEq
537             Then
538 0         0 {testSet("t", $r);
539             IfEq
540             Then
541 0         0 {new(2, "Prefix term");
542 0         0 Jmp $success;
543 0         0 };
544 0         0 };
545             # KeepFree $l, $r;
546 0         0 };
547              
548 0         0 ClearZF; # Failed to match anything
549 0         0 Jmp $end;
550              
551 0         0 SetLabel $success; # Successfully matched
552 0         0 SetZF;
553              
554 0         0 SetLabel $end; # End
555             } # reduce
556              
557             sub reduceMultiple($) #P Reduce existing operators on the stack.
558 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
559             K('count',99)->for(sub # An improbably high but finite number of reductions
560 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
561 0         0 reduce($priority);
562 0         0 Jne $end; # Keep going as long as reductions are possible
563 0         0 });
564             }
565              
566             sub accept_a() #P Assign.
567 0     0 1 0 {checkSet("t");
568 0         0 reduceMultiple 2;
569 0 0       0 PrintErrStringNL "accept a" if $debug;
570 0         0 pushElement;
571             }
572              
573             sub accept_b #P Open.
574 0     0 1 0 {checkSet("abdps");
575 0 0       0 PrintErrStringNL "accept b" if $debug;
576 0         0 pushElement;
577             }
578              
579             sub accept_B #P Closing parenthesis.
580 0     0 1 0 {checkSet("bst");
581 0 0       0 PrintErrStringNL "accept B" if $debug;
582 0         0 reduceMultiple 1;
583 0         0 pushElement;
584 0         0 reduceMultiple 1;
585 0         0 checkSet("bst");
586             }
587              
588             sub accept_d #P Infix but not assign or semi-colon.
589 0     0 1 0 {checkSet("t");
590 0 0       0 PrintErrStringNL "accept d" if $debug;
591 0         0 pushElement;
592             }
593              
594             sub accept_p #P Prefix.
595 0     0 1 0 {checkSet("abdps");
596 0 0       0 PrintErrStringNL "accept p" if $debug;
597 0         0 pushElement;
598             }
599              
600             sub accept_q #P Post fix.
601 0     0 1 0 {checkSet("t");
602 0 0       0 PrintErrStringNL "accept q" if $debug;
603             IfEq # Post fix operator applied to a term
604             Then
605 0     0   0 {Pop $w1;
606 0         0 pushElement;
607 0         0 Push $w1;
608 0         0 new(2, "Postfix");
609             }
610 0         0 }
611              
612             sub accept_s #P Semi colon.
613 0     0 1 0 {checkSet("bst");
614 0 0       0 PrintErrStringNL "accept s" if $debug;
615 0         0 Mov $w1, "[rsp]";
616 0         0 testSet("s", $w1);
617             IfEq # Insert an empty element between two consecutive semicolons
618             Then
619 0     0   0 {pushEmpty;
620 0         0 };
621 0         0 reduceMultiple 1;
622 0         0 pushElement;
623             }
624              
625             sub accept_v #P Variable.
626 0     0 1 0 {checkSet("abdps");
627 0 0       0 PrintErrStringNL "accept v" if $debug;
628 0         0 pushElement;
629 0         0 new(1, "Variable");
630             V(count,99)->for(sub # Reduce prefix operators
631 0     0   0 {my ($index, $start, $next, $end) = @_;
632 0         0 checkStackHas 2;
633 0         0 Jl $end;
634 0         0 my ($l, $r) = ($w1, $w2);
635 0         0 Mov $l, "[rsp+".(1*$ses)."]";
636 0         0 Mov $r, "[rsp+".(0*$ses)."]";
637 0         0 testSet("p", $l);
638 0         0 Jne $end;
639 0         0 new(2, "Prefixed variable");
640 0         0 });
641             }
642              
643             sub parseExpression() #P Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
644 0     0 1 0 {my $end = Label;
645 0         0 my $eb = $element."b"; # Contains a byte from the item being parsed
646              
647 0         0 Cmp $size, 0; # Check for empty expression
648 0         0 Je $end;
649              
650 0         0 loadCurrentChar; # Load current character
651             ### Need test for ignorable white space as first character
652 0         0 testSet($firstSet, $element);
653             IfNe
654             Then
655 0     0   0 {error(<
656             Expression must start with 'opening parenthesis', 'prefix
657             operator', 'semi-colon' or 'variable'.
658             END
659 0         0 };
660              
661 0         0 testSet("v", $element); # Single variable
662             IfEq
663             Then
664 0     0   0 {pushElement;
665 0         0 new(1, "accept initial variable");
666             },
667             Else
668 0     0   0 {testSet("s", $element); # Semi
669             IfEq
670             Then
671 0         0 {pushEmpty;
672 0         0 new(1, "accept initial semicolon");
673 0         0 };
674 0         0 pushElement;
675 0         0 };
676              
677 0         0 Inc $index; # We have processed the first character above
678 0         0 Mov $prevChar, $element; # Initialize the previous lexical item
679              
680             For # Parse each utf32 character after it has been classified
681 0     0   0 {my ($start, $end, $next) = @_; # Start and end of the classification loop
682 0         0 loadCurrentChar; # Load current character
683              
684 0 0       0 PrintErrRegisterInHex $element if $debug;
685              
686 0         0 Cmp $eb, $WhiteSpace;
687 0         0 Je $next; # Ignore white space
688              
689 0         0 Cmp $eb, 1; # Brackets are singular but everything else can potential be a plurality
690             IfGt
691             Then
692 0         0 {Cmp $prevChar."b", $eb; # Compare with previous element known not to be white space or a bracket
693 0         0 Je $next
694 0         0 };
695 0         0 Mov $prevChar, $element; # Save element to previous element now we know we are on a different element
696              
697 0         0 for my $l(sort keys $Lex->{lexicals}->%*) # Each possible lexical item after classification
698 0         0 {my $x = $Lex->{lexicals}{$l}{letter};
699 0 0       0 next unless $x; # Skip characters that do not have a letter defined for Tree::Term because the lexical items needed to layout a file of lexical items are folded down to the actual lexical items required to represent the language independent of the textual layout with white space.
700              
701 0         0 my $n = $Lex->{lexicals}{$l}{number};
702 0         0 Comment "Compare to $n for $l";
703 0         0 Cmp $eb, $n;
704              
705             IfEq
706             Then
707 0         0 {eval "accept_$x";
708 0         0 Jmp $next
709 0         0 };
710             }
711 0         0 error("Unexpected lexical item"); # Not selected
712 0         0 } $index, $size;
713              
714 0         0 testSet($lastSet, $prevChar); # Last lexical element
715             IfNe # Incomplete expression
716             Then
717 0     0   0 {error("Incomplete expression");
718 0         0 };
719              
720             K('count', 99)->for(sub # Remove trailing semicolons if present
721 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
722 0         0 checkStackHas 2;
723 0         0 Jl $end; # Does not have two or more elements
724 0         0 Pop $w1;
725 0         0 testSet("s", $w1); # Check that the top most element is a semi colon
726             IfNe # Not a semi colon so put it back and finish the loop
727             Then
728 0         0 {Push $w1;
729 0         0 Jmp $end;
730 0         0 };
731 0         0 });
732              
733 0         0 reduceMultiple 1; # Final reductions
734              
735 0         0 checkStackHas 1;
736             IfNe # Incomplete expression
737             Then
738 0     0   0 {error("Multiple expressions on stack");
739 0         0 };
740              
741 0         0 Pop r15; # The resulting parse tree
742 0         0 Shr r15, 32; # The offset of the resulting parse tree
743 0         0 SetLabel $end;
744             } # parseExpression
745              
746             sub MatchBrackets(@) #P Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
747 0     0 1 0 {my (@parameters) = @_; # Parameters
748 0 0       0 @_ >= 1 or confess "One or more parameters";
749              
750             my $s = Subroutine
751 0     0   0 {my ($p) = @_; # Parameters
752 0         0 Comment "Match brackets in utf32 text";
753              
754 0         0 my $finish = Label;
755 0         0 PushR xmm0, k7, r10, r11, r12, r13, r14, r15, rsi; # R15 current character address. r14 is the current classification. r13 the last classification code. r12 the stack depth. r11 the number of opening brackets found. r10 address of first utf32 character.
756              
757 0         0 Mov rsi, rsp; # Save stack location so we can use the stack to record the brackets we have found
758 0         0 ClearRegisters r11, r12, r15; # Count the number of brackets and track the stack depth, index of each character
759 0         0 K(three, 3)->setMaskFirst(k7); # These are the number of bytes that we are going to use for the offsets of brackets which limits the size of a program to 24 million utf32 characters
760 0         0 $$p{fail} ->getReg(r11); # Clear failure indicator
761 0         0 $$p{opens} ->getReg(r11); # Clear count of opens
762 0         0 $$p{address}->setReg(r10); # Address of first utf32 character
763 0         0 my $w = RegisterSize eax; # Size of a utf32 character
764              
765             $$p{size}->for(sub # Process each utf32 character in the block of memory
766 0         0 {my ($index, $start, $next, $end) = @_;
767 0         0 my $continue = Label;
768              
769 0         0 Mov r14b, "[r10+$w*r15+3]"; # Classification character
770              
771 0         0 Cmp r14, 0x10; # First bracket
772 0         0 Jl $continue; # Less than first bracket
773 0         0 Cmp r14, 0x4f; # Last bracket
774 0         0 Jg $continue; # Greater than last bracket
775              
776 0         0 Test r14, 1; # Zero means that the bracket is an opener
777             IfZ sub # Save an opener then continue
778 0         0 {Push r15; # Save position in input
779 0         0 Push r14; # Save opening code
780 0         0 Inc r11; # Count number of opening brackets
781 0         0 Inc r12; # Number of brackets currently open
782 0         0 Jmp $continue;
783 0         0 };
784 0         0 Cmp r12, 1; # Check that there is a bracket to match on the stack
785             IfLt sub # Nothing on stack
786 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
787 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
788 0         0 Jmp $finish; # Return
789 0         0 };
790 0         0 Mov r13, "[rsp]"; # Peek at the opening bracket code which is on top of the stack
791 0         0 Inc r13; # Expected closing bracket
792 0         0 Cmp r13, r14; # Check for match
793             IfNe sub # Mismatch
794 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
795 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
796 0         0 Jmp $finish; # Return
797 0         0 };
798 0         0 Pop r13; # The closing bracket matches the opening bracket
799 0         0 Pop r13; # Offset of opener
800 0         0 Dec r12; # Close off bracket sequence
801 0         0 Vpbroadcastq xmm0, r15; # Load offset of opener
802 0         0 Vmovdqu8 "[r10+$w*r13]\{k7}", xmm0; # Save offset of opener in the code for the closer - the classification is left intact so we still know what kind of bracket we have
803 0         0 Vpbroadcastq xmm0, r13; # Load offset of opener
804 0         0 Vmovdqu8 "[r10+$w*r15]\{k7}", xmm0; # Save offset of closer in the code for the openercloser - the classification is left intact so we still know what kind of bracket we have
805 0         0 SetLabel $continue; # Continue with next character
806 0         0 Inc r15; # Next character
807 0         0 });
808              
809 0         0 SetLabel $finish;
810 0         0 Mov rsp, rsi; # Restore stack
811 0         0 $$p{opens}->getReg(r11); # Number of brackets opened
812 0         0 PopR;
813 0         0 } [qw(address size fail opens)], name => q(Unisyn::Parse::MatchBrackets);
814              
815 0         0 $s->call(@parameters);
816             } # MatchBrackets
817              
818             sub ClassifyNewLines(@) #P Scan input string looking for opportunities to convert new lines into semi colons.
819 0     0 1 0 {my (@parameters) = @_; # Parameters
820 0 0       0 @_ >= 1 or confess "One or more parameters";
821              
822             my $s = Subroutine
823 0     0   0 {my ($p) = @_; # Parameters
824 0         0 my $current = r15; # Index of the current character
825 0         0 my $middle = r14; # Index of the middle character
826 0         0 my $first = r13; # Index of the first character
827 0         0 my $address = r12; # Address of input string
828 0         0 my $size = r11; # Length of input utf32 string
829 0         0 my($c1, $c2) = (r8."b", r9."b"); # Lexical codes being tested
830              
831 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
832              
833 0         0 $$p{address}->setReg($address); # Address of string
834 0         0 $$p{size} ->setReg($size); # Size of string
835 0         0 Mov $current, 2; Mov $middle, 1; Mov $first, 0;
  0         0  
  0         0  
836              
837             For # Each character in input string
838 0         0 {my ($start, $end, $next) = @_; # Start, end and next labels
839              
840              
841 0         0 getLexicalCode $c1, $address, $middle; # Lexical code of the middle character
842 0         0 Cmp $c1, $WhiteSpace;
843             IfEq
844             Then
845 0         0 {getAlpha $c1, $address, $middle;
846              
847 0         0 Cmp $c1, $asciiNewLine;
848             IfEq # Middle character is a insignificant new line and thus could be a semicolon
849             Then
850 0         0 {getLexicalCode $c1, $address, $first;
851              
852             my sub makeSemiColon # Make a new line into a new line semicolon
853 0         0 {putLexicalCode $c2, $address, $middle, $NewLineSemiColon;
854             }
855              
856             my sub check_bpv # Make new line if followed by 'b', 'p' or 'v'
857 0         0 {getLexicalCode $c1, $address, $current;
858 0         0 Cmp $c1, $OpenBracket;
859              
860             IfEq
861             Then
862 0         0 {makeSemiColon;
863             },
864             Else
865 0         0 {Cmp $c1, $prefix;
866             IfEq
867             Then
868 0         0 {makeSemiColon;
869             },
870             Else
871 0         0 {Cmp $c1, $variable;
872             IfEq
873             Then
874 0         0 {makeSemiColon;
875 0         0 };
876 0         0 };
877 0         0 };
878             }
879              
880 0         0 Cmp $c1, $CloseBracket; # Check first character of sequence
881             IfEq
882             Then
883 0         0 {check_bpv;
884             },
885             Else
886 0         0 {Cmp $c1, $suffix;
887             IfEq
888             Then
889 0         0 {check_bpv;
890             },
891             Else
892 0         0 {Cmp $c1, $variable;
893             IfEq
894             Then
895 0         0 {check_bpv;
896 0         0 };
897 0         0 };
898 0         0 };
899 0         0 };
900 0         0 };
901              
902 0         0 Mov $first, $middle; Mov $middle, $current; # Find next lexical item
  0         0  
903 0         0 getLexicalCode $c1, $address, $current; # Current lexical code
904 0         0 Mov $middle, $current;
905 0         0 Inc $current; # Next possible character
906             For
907 0         0 {my ($start, $end, $next) = @_;
908 0         0 getLexicalCode $c2, $address, $current; # Lexical code of next character
909 0         0 Cmp $c1, $c2;
910 0         0 Jne $end; # Terminate when we are in a different lexical item
911 0         0 } $current, $size;
912 0         0 } $current, $size;
913              
914 0         0 PopR;
915 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyNewLines);
916              
917 0         0 $s->call(@parameters);
918             } # ClassifyNewLines
919              
920             sub ClassifyWhiteSpace(@) #P Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
921 0     0 1 0 {my (@parameters) = @_; # Parameters
922 0 0       0 @_ >= 1 or confess "One or more parameters";
923              
924             my $s = Subroutine
925 0         0 {my ($p) = @_; # Parameters
926 0         0 my $eb = r15."b"; # Lexical type of current char
927 0         0 my $s = r14; # State of white space between 'a'
928 0         0 my $S = r13; # State of white space before 'a'
929 0         0 my $cb = r12."b"; # Actual character within alphabet
930 0         0 my $address = r11; # Address of input string
931 0         0 my $index = r10; # Index of current char
932 0         0 my ($w1, $w2) = (r8."b", r9."b"); # Temporary work registers
933              
934             my sub getAlpha($;$) # Load the position of a lexical item in its alphabet from the current character
935 0         0 {my ($register, $indexReg) = @_; # Register to load, optional index register
936 0   0     0 getAlpha $register, $address, $index // $indexReg # Supplied index or default
937             };
938              
939             my sub getLexicalCode() # Load the lexical code of the current character in memory into the current character
940 0         0 {getLexicalCode $eb, $address, $index; # Supplied index or default
941             };
942              
943             my sub putLexicalCode($;$) # Put the specified lexical code into the current character in memory.
944 0         0 {my ($code, $indexReg) = @_; # Code, optional index register
945 0   0     0 putLexicalCode $w1, $address, ($indexReg//$index), $code;
946             };
947              
948 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
949              
950 0         0 $$p{address}->setReg($address); # Address of string
951 0         0 Mov $s, -1; Mov $S, -1; Mov $index, 0; # Initial states, position
  0         0  
  0         0  
952              
953             $$p{size}->for(sub # Each character in expression
954 0         0 {my ($indexVariable, $start, $next, $end) = @_;
955              
956 0         0 $indexVariable->setReg($index);
957 0         0 getLexicalCode; # Current lexical code
958              
959             AndBlock # Trap space before new line and detect new line after ascii
960 0         0 {my ($end, $start) = @_;
961 0         0 Cmp $index, 0; Je $end; # Start beyond the first character so we can look back one character.
  0         0  
962 0         0 Cmp $eb, $Ascii; Jne $end; # Current is ascii
  0         0  
963              
964 0         0 Mov $w1, "[$address+$indexScale*$index-$indexScale+$lexCodeOffset]"; # Previous lexical code
965 0         0 Cmp $w1, $Ascii; Jne $end; # Previous is ascii
  0         0  
966              
967 0         0 if (1) # Check for 's' followed by 'n' and 'a' followed by 'n'
968 0         0 {Mov $w1, "[$address+$indexScale*$index-$indexScale]"; # Previous character
969 0         0 getAlpha $w2; # Current character
970              
971 0         0 Cmp $w1, $asciiSpace; # Check for space followed by new line
972             IfEq
973             Then
974 0         0 {Cmp $w2, $asciiNewLine;
975             IfEq # Disallow 's' followed by 'n'
976             Then
977 0         0 {PrintErrStringNL "Space detected before new line at index:";
978 0         0 PrintErrRegisterInHex $index;
979 0         0 PrintErrTraceBack;
980 0         0 Exit(1);
981 0         0 };
982 0         0 };
983              
984 0         0 Cmp $w1, $asciiSpace; Je $end; # Check for 'a' followed by 'n'
  0         0  
985 0         0 Cmp $w1, $asciiNewLine; Je $end; # Current is 'a' but not 'n' or 's'
  0         0  
986 0         0 Cmp $w2, $asciiNewLine; Jne $end; # Current is 'n'
  0         0  
987              
988 0         0 putLexicalCode $WhiteSpace; # Mark new line as significant
989             }
990 0         0 };
991              
992             AndBlock # Spaces and new lines between other ascii
993 0         0 {my ($end, $start) = @_;
994 0         0 Cmp $s, -1;
995             IfEq # Looking for opening ascii
996             Then
997 0         0 {Cmp $eb, $Ascii; Jne $end; # Not ascii
  0         0  
998 0         0 getAlpha $cb; # Current character
999 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
1000 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
1001             IfEq
1002             Then
1003 0         0 {Mov $s, $index; Inc $s; # Ascii not space nor new line
  0         0  
1004 0         0 };
1005 0         0 Jmp $end;
1006             },
1007             Else # Looking for closing ascii
1008 0         0 {Cmp $eb, $Ascii;
1009             IfNe # Not ascii
1010             Then
1011 0         0 {Mov $s, -1;
1012 0         0 Jmp $end
1013 0         0 };
1014 0         0 getAlpha $cb; # Current character
1015 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
1016 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
1017              
1018             For # Move over spaces and new lines between two ascii characters that are neither of new line or space
1019 0         0 {my ($start, $end, $next) = @_;
1020 0         0 getAlpha $cb, $s; # Check for 's' or 'n'
1021 0         0 Cmp $cb, $asciiSpace;
1022             IfEq
1023             Then
1024 0         0 {putLexicalCode $WhiteSpace, $s; # Mark as significant white space.
1025 0         0 Jmp $next;
1026 0         0 };
1027 0         0 Cmp $cb, $asciiNewLine;
1028             IfEq
1029             Then
1030 0         0 {putLexicalCode $WhiteSpace; # Mark as significant new line
1031 0         0 Jmp $next;
1032 0         0 };
1033 0         0 } $s, $index;
1034              
1035 0         0 Mov $s, $index; Inc $s;
  0         0  
1036 0         0 };
1037 0         0 };
1038              
1039             AndBlock # Note: 's' preceding 'a' are significant
1040 0         0 {my ($end, $start) = @_;
1041 0         0 Cmp $S, -1;
1042             IfEq # Looking for 's'
1043             Then
1044 0         0 {Cmp $eb, $Ascii; # Not 'a'
1045             IfNe
1046             Then
1047 0         0 {Mov $S, -1;
1048 0         0 Jmp $end
1049 0         0 };
1050 0         0 getAlpha $cb; # Actual character in alphabet
1051 0         0 Cmp $cb, $asciiSpace; # Space
1052             IfEq
1053             Then
1054 0         0 {Mov $S, $index;
1055 0         0 Jmp $end;
1056 0         0 };
1057             },
1058             Else # Looking for 'a'
1059 0         0 {Cmp $eb, $Ascii; # Not 'a'
1060             IfNe
1061             Then
1062 0         0 {Mov $S, -1;
1063 0         0 Jmp $end
1064 0         0 };
1065 0         0 getAlpha $cb; # Actual character in alphabet
1066 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip 's'
  0         0  
1067              
1068 0         0 Cmp $cb, $asciiNewLine;
1069             IfEq # New lines prevent 's' from preceding 'a'
1070             Then
1071 0         0 {Mov $s, -1;
1072 0         0 Jmp $end
1073 0         0 };
1074              
1075             For # Move over spaces to non space ascii
1076 0         0 {my ($start, $end, $next) = @_;
1077 0         0 putLexicalCode $WhiteSpace, $S; # Mark new line as significant
1078 0         0 } $S, $index;
1079 0         0 Mov $S, -1; # Look for next possible space
1080             }
1081 0         0 };
  0         0  
1082 0         0 });
1083              
1084             $$p{size}->for(sub # Invert white space so that significant white space becomes ascii and the remainder is ignored
1085 0         0 {my ($indexVariable, $start, $next, $end) = @_;
1086              
1087 0         0 $indexVariable->setReg($index);
1088 0         0 getLexicalCode; # Current lexical code
1089              
1090             AndBlock # Invert non significant white space
1091 0         0 {my ($end, $start) = @_;
1092 0         0 Cmp $eb, $Ascii;
1093 0         0 Jne $end; # Ascii
1094              
1095 0         0 getAlpha $cb; # Actual character in alphabet
1096 0         0 Cmp $cb, $asciiSpace;
1097             IfEq
1098             Then
1099 0         0 {putLexicalCode $WhiteSpace;
1100 0         0 Jmp $next;
1101 0         0 };
1102 0         0 Cmp $cb, $asciiNewLine;
1103             IfEq
1104             Then
1105 0         0 {putLexicalCode $WhiteSpace; # Mark new line as not significant
1106 0         0 Jmp $next;
1107 0         0 };
1108 0         0 };
1109              
1110             AndBlock # Mark significant white space
1111 0         0 {my ($end, $start) = @_;
1112 0         0 Cmp $eb, $WhiteSpace; Jne $end; # Not significant white space
  0         0  
1113 0         0 putLexicalCode $Ascii; # Mark as ascii
1114 0         0 };
1115 0         0 });
1116              
1117 0         0 PopR;
1118 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyWhiteSpace);
1119              
1120 0         0 $s->call(@parameters);
1121             } # ClassifyWhiteSpace
1122              
1123             sub reload($$) #P Reload the variables associated with a parse.
1124 0     0 1 0 {my ($parse, $parameters) = @_; # Parse, hash of variable parameters
1125 0 0       0 @_ >= 1 or confess "One or more parameters";
1126              
1127             $parse->quarks->reload (arena => $$parameters{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
1128             array => $$parameters{numbersToStringsFirst},
1129 0         0 tree => $$parameters{stringsToNumbersFirst});
1130              
1131             $parse->operators->reload(arena => $$parameters{bs}, # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
1132             array => $$parameters{opNumbersToStringsFirst},
1133 0 0       0 tree => $$parameters{opStringsToNumbersFirst}) if $parse->operators;
1134             }
1135              
1136             sub parseUtf8($@) #P Parse a unisyn expression encoded as utf8 and return the parse tree.
1137 0     0 1 0 {my ($parse, @parameters) = @_; # Parse, parameters
1138 0 0       0 @_ >= 1 or confess "One or more parameters";
1139              
1140             my $s = Subroutine
1141 0         0 {my ($p, $s) = @_; # Parameters
1142 0         0 $ParseUtf8SubDef = $s; # Save the sub definition globally so that we can forward its parameter list to L.
1143              
1144 0         0 $parse->reload($p); # Reload the parse description
1145 0 0       0 PrintErrStringNL "ParseUtf8" if $debug;
1146              
1147 0         0 PushR $parseStackBase, map {"r$_"} 8..15;
  0         0  
1148 0         0 PushZmm 0..1; PushMask 0..2; # Used to hold arena and classifiers. Zmm0 is used to as a short string to quark the lexical item strings.
  0         0  
1149              
1150 0         0 my $source32 = $$p{source32};
1151 0         0 my $sourceSize32 = $$p{sourceSize32};
1152 0         0 my $sourceLength32 = $$p{sourceLength32};
1153              
1154             ConvertUtf8ToUtf32 u8 => $$p{address}, size8 => $$p{size}, # Convert to utf32
1155 0         0 u32 => $source32, size32 => $sourceSize32,
1156             count => $sourceLength32;
1157              
1158             my sub PrintUtf32($$) # Print a utf32 string in hexadecimal
1159 0         0 {my ($size, $address) = @_; # Variable size, variable address
1160 0         0 $address->printErrMemoryInHexNL($size);
1161             }
1162              
1163 0 0       0 if ($debug)
1164 0         0 {PrintErrStringNL "After conversion from utf8 to utf32";
1165 0         0 $sourceSize32 ->errNL("Output Length: "); # Write output length
1166 0         0 PrintUtf32($sourceSize32, $source32); # Print utf32
1167             }
1168              
1169 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{lexicalLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1170 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{lexicalHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1171              
1172 0         0 ClassifyWithInRangeAndSaveOffset address=>$source32, size=>$sourceLength32; # Alphabetic classification
1173 0 0       0 if ($debug)
1174 0         0 {PrintErrStringNL "After classification into alphabet ranges";
1175 0         0 PrintUtf32($sourceSize32, $source32); # Print classified utf32
1176             }
1177              
1178 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{bracketsLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1179 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{bracketsHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1180              
1181 0         0 ClassifyWithInRange address=>$source32, size=>$sourceLength32; # Bracket classification
1182 0 0       0 if ($debug)
1183 0         0 {PrintErrStringNL "After classification into brackets";
1184 0         0 PrintUtf32($sourceSize32, $source32); # Print classified brackets
1185             }
1186              
1187 0         0 my $opens = V(opens, -1);
1188 0         0 MatchBrackets address=>$source32, size=>$sourceLength32, $opens, $$p{fail}; # Match brackets
1189 0 0       0 if ($debug)
1190 0         0 {PrintErrStringNL "After bracket matching";
1191 0         0 PrintUtf32($sourceSize32, $source32); # Print matched brackets
1192             }
1193              
1194 0         0 ClassifyWhiteSpace address=>$source32, size=>$sourceLength32; # Classify white space
1195 0 0       0 if ($debug)
1196 0         0 {PrintErrStringNL "After white space classification";
1197 0         0 PrintUtf32($sourceSize32, $source32);
1198             }
1199              
1200 0         0 ClassifyNewLines address=>$source32, size=>$sourceLength32; # Classify new lines
1201 0 0       0 if ($debug)
1202 0         0 {PrintErrStringNL "After classifying new lines";
1203 0         0 PrintUtf32($sourceSize32, $source32);
1204             }
1205              
1206 0         0 $$p{source32} ->setReg($start); # Start of expression string after it has been classified
1207 0         0 $$p{sourceLength32}->setReg($size); # Number of characters in the expression
1208 0         0 Mov $parseStackBase, rsp; # Set base of parse stack
1209              
1210 0         0 parseExpression; # Parse the expression
1211              
1212 0         0 $$p{parse}->getReg(r15); # Number of characters in the expression
1213 0         0 Mov rsp, $parseStackBase; # Remove parse stack
1214              
1215 0 0       0 $$p{parse}->errNL if $debug;
1216              
1217 0         0 PopMask; PopZmm; PopR;
  0         0  
  0         0  
1218              
1219             }
1220 0         0 [qw(bs address size parse fail source32 sourceSize32 sourceLength32),
1221             qw(numbersToStringsFirst stringsToNumbersFirst),
1222             qw(opNumbersToStringsFirst opStringsToNumbersFirst)],
1223             name => q(Unisyn::Parse::parseUtf8);
1224              
1225 0         0 my $op = $parse->operators; # The operator methods if supplied
1226 0         0 my $zero = K(zero, 0);
1227              
1228 0 0       0 $s->call # Parameterize the parse
    0          
1229             (bs => $parse->arena->bs,
1230             address => $parse->address8,
1231             fail => $parse->fails,
1232             parse => $parse->parse,
1233             size => $parse->size8,
1234             source32 => $parse->source32,
1235             sourceLength32 => $parse->sourceLength32,
1236             sourceSize32 => $parse->sourceSize32,
1237             numbersToStringsFirst => $parse->quarks->numbersToStrings->first,
1238             stringsToNumbersFirst => $parse->quarks->stringsToNumbers->first,
1239             opNumbersToStringsFirst => $op ? $op->numbersToStrings->first : $zero,
1240             opStringsToNumbersFirst => $op ? $op->stringsToNumbers->first : $zero,
1241             );
1242             } # parseUtf8
1243              
1244             #D1 Traverse # Traverse the parse tree
1245              
1246             sub traverseParseTree($) # Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
1247 0     0 1 0 {my ($parse) = @_; # Parse tree
1248              
1249             my $s = Subroutine # Print a tree
1250 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1251 0         0 my $t = Nasm::X86::DescribeTree (arena=>$$p{bs}, first=>$$p{first}); # Tree definition
1252 0         0 $t->find(K(key, $opType)); # The lexical type of the element - normally a term
1253              
1254             If $t->found == 0, # Not found lexical type of element
1255             Then
1256 0         0 {PrintOutString "No type for node";
1257 0         0 Exit(1);
1258 0         0 };
1259              
1260             If $t->data != $term, # Expected a term
1261             Then
1262 0         0 {PrintOutString "Expected a term";
1263 0         0 Exit(1);
1264 0         0 };
1265              
1266 0         0 my $operands = V(operands); # Number of operands
1267 0         0 $t->find(K(key, $opCount)); # Key 1 tells us the number of operands
1268             If $t->found > 0, # Found key 1
1269             Then
1270 0         0 {$operands->copy($t->data); # Number of operands
1271             },
1272             Else
1273 0         0 {PrintOutString "Expected at least one operand";
1274 0         0 Exit(1);
1275 0         0 };
1276              
1277             $operands->for(sub # Each operand
1278 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1279 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1280 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1281 0         0 $t->find($i+$lexItemOffset); my $off = V(key) ->copy($t->data); # Offset of first block of sub tree
  0         0  
1282              
1283             If $lex == $term, # Term
1284             Then
1285 0         0 {$s->call($$p{bs}, first => $off); # Traverse sub tree referenced by offset field
1286 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1287             },
1288 0         0 });
  0         0  
1289              
1290 0         0 $t->find(K(key, $opSub)); # The subroutine for the term
1291             If $t->found > 0, # Found subroutine for term
1292             Then # Call subroutine for this term
1293             {#PushR r15, zmm0;
1294             my $p = Subroutine # Prototype subroutine to establish parameter list
1295 0         0 {} [qw(tree call)], with => $s,
1296             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::prototype";
1297              
1298             my $d = Subroutine # Dispatcher
1299 0         0 {my ($q, $sub) = @_;
1300 0         0 $p->dispatchV($$q{call}, r15);
1301 0         0 } [], with => $p,
1302             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::dispatch";
1303              
1304             If $t->data > 0,
1305             Then
1306 0         0 {$d->call(tree => $t->first, call => $t->data) # Call sub associated with the lexical item
1307 0         0 };
1308             # my $p = Subroutine # Subroutine
1309             # {my ($parameters) = @_; # Parameters
1310             # $$parameters{call}->setReg(r15);
1311             # Call r15;
1312             # } [qw(tree call)], with => $s,
1313             # name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem";
1314             #
1315             # my $l = RegisterSize rax;
1316             # $$p{bs} ->putQIntoZmm(0, 0*$l, r15);
1317             # $$p{first}->putQIntoZmm(0, 1*$l, r15);
1318             # $t->data ->setReg(r15);
1319             # Call r15;
1320             # #PopR;
1321 0         0 };
1322              
1323 0         0 } [qw(bs first)], name => "Nasm::X86::Tree::traverseParseTree";
1324              
1325 0         0 PushR r15, zmm0;
1326 0         0 $s->call($parse->arena->bs, first => $parse->parse);
1327 0         0 PopR;
1328              
1329 0         0 $a
1330             } # traverseParseTree
1331              
1332             sub makeExecutionChain($) # Traverse the parse tree in post order to create an execution chain.
1333 0     0 1 0 {my ($parse) = @_; # Parse tree
1334 0         0 my $W = $parse->width; # Width of entries in exec chain blocks
1335              
1336             my $s = Subroutine # Print a tree
1337 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1338 0         0 my $t = Nasm::X86::DescribeTree (arena=>$$p{bs}, first=>$$p{first}); # Tree definition
1339 0         0 $t->find(K(key, $opType)); # The lexical type of the element - normally a term
1340              
1341             If $t->found == 0, # Not found lexical type of element
1342             Then
1343 0         0 {PrintOutString "No type for node";
1344 0         0 Exit(1);
1345 0         0 };
1346              
1347             If $t->data != $term, # Expected a term
1348             Then
1349 0         0 {PrintOutString "Expected a term";
1350 0         0 Exit(1);
1351 0         0 };
1352              
1353 0         0 ClearRegisters zmm0; # Place term on execution chain
1354 0         0 $$p{chain}->putDIntoZmm(0, $execChainNext * $W, r15); # Offset of previous block
1355 0         0 $$p{first}->putDIntoZmm(0, $execChainTerm * $W, r15); # Save term offset
1356              
1357 0         0 $t->find(K(key, $opSub)); # The subroutine for the term
1358             If $t->found > 0, # Found subroutine for term
1359             Then # Call subroutine for this term
1360 0         0 {$t->data->putDIntoZmm(0, $execChainSub * $W, r15); # Save operator
1361 0         0 };
1362              
1363 0         0 my $block = $parse->arena->allocZmmBlock; # Create exec chain element
1364 0         0 $parse->arena->putZmmBlock($block, 0, r14, r15); # Save exec chain element
1365 0         0 $$p{chain}->copy($block); # Save address of block
1366              
1367 0         0 my $operands = V(operands); # Number of operands
1368 0         0 $t->find(K(key, $opCount)); # Key 1 tells us the number of operands
1369             If $t->found > 0, # Found key 1
1370             Then
1371 0         0 {$operands->copy($t->data); # Number of operands
1372             },
1373             Else
1374 0         0 {PrintOutString "Expected at least one operand";
1375 0         0 Exit(1);
1376 0         0 };
1377              
1378             $operands->for(sub # Each operand
1379 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1380 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1381 0         0 $t->find($i+$lexItemType); my $lex = $t->data->clone('key'); # Lexical type
  0         0  
1382 0         0 $t->find($i+$lexItemOffset); my $off = $t->data->clone('key'); # Offset of first block of sub tree
  0         0  
1383              
1384             If $lex == $term, # Term
1385             Then
1386 0         0 {$s->call($$p{bs}, first => $off, chain => $$p{chain}); # Traverse sub tree referenced by offset field
1387 0         0 $t->first->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1388             },
1389 0         0 });
  0         0  
1390              
1391 0         0 } [qw(bs first chain)], name => "Nasm::X86::Tree::makeExecutionChain";
1392              
1393 0         0 PushR r14, r15, zmm0;
1394              
1395 0         0 $s->call($parse->arena->bs, first => $parse->parse, my $chain = V('chain',0));# Construct execution chain
1396              
1397             If $chain > 0, # Print execution chain
1398             Then
1399 0     0   0 {my $A = $parse->arena;
1400 0         0 my $a = V('zero', 0);
1401 0         0 my $b = $chain->clone;
1402              
1403             ForEver # Loop through exec chain reversing each link
1404 0         0 {my ($start, $end) = @_;
1405 0         0 $A ->getZmmBlock($b, 0, r14, r15);
1406 0         0 my $c = Nasm::X86::getDFromZmm(0, $execChainNext, r15);
1407 0         0 $a->putDIntoZmm(0, $execChainNext);
1408 0         0 $A->putZmmBlock($b, 0, r14, r15);
1409              
1410 0         0 If $c == 0, Then {Jmp $end};
  0         0  
1411 0         0 $a->copy($b);
1412 0         0 $b->copy($c);
1413 0         0 };
1414 0         0 my $t = $parse->arena->DescribeTree(first => $parse->parse); # Parse tree
1415 0         0 $t->insert(V('key', $opChain), $b); # Save start of chain
1416 0         0 };
1417             #
1418              
1419 0         0 PopR;
1420              
1421 0         0 $a
1422             } # makeExecutionChain
1423              
1424             sub printExecChain($) #P Print the execute chain for a parse.
1425 0     0 1 0 {my ($parse) = @_; # Parse tree
1426 0         0 my $t = $parse->arena->DescribeTree(first=>$parse->parse);
1427 0         0 $t->find(V('key', $opChain)); # Start of chain
1428 0         0 my $p = $t->data->clone;
1429              
1430             ForEver
1431 0     0   0 {my ($start, $end) = @_; # Fail block, end of fail block, start of test block
1432 0         0 If $p == 0, Then {Jmp $end}; # End of chain
  0         0  
1433 0         0 $parse->arena->getZmmBlock($p, 0, r14, r15);
1434 0         0 $p->out("offset: ", " : ");
1435 0         0 PrintOutRegisterInHex zmm0;
1436 0         0 $p->copy(Nasm::X86::getDFromZmm(0, $execChainNext, r15));
1437 0         0 };
1438             }
1439              
1440             #D1 Print # Print a parse tree
1441              
1442             sub printLexicalItem($$$$) #P Print the utf8 string corresponding to a lexical item at a variable offset.
1443 0     0 1 0 {my ($parse, $source32, $offset, $size) = @_; # Parse tree, B
of utf32 source representation, B to lexical item in utf32, B in utf32 chars of item
1444 0         0 my $t = $parse->arena->DescribeTree;
1445              
1446             my $s = Subroutine
1447 0     0   0 {my ($p, $s) = @_; # Parameters
1448 0         0 PushR r12, r13, r14, r15;
1449              
1450 0         0 $$p{source32}->setReg(r14);
1451 0         0 $$p{offset} ->setReg(r15);
1452 0         0 Lea r13, "[r14+4*r15]"; # Address lexical item
1453 0         0 Mov eax, "[r13]"; # First lexical item clearing rax
1454 0         0 Shr rax, 24; # First lexical item type in lowest byte and all else cleared
1455              
1456 0         0 my $success = Label;
1457 0         0 my $print = Label;
1458              
1459 0         0 Cmp rax, $bracketsBase; # Test for brackets
1460             IfGe
1461             Then
1462 0         0 {my $o = $Lex->{bracketsOpen}; # Opening brackets
1463 0         0 my $c = $Lex->{bracketsClose}; # Closing brackets
1464 0         0 my $O = Rutf8 map {($_, chr(0))} @$o; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1465 0         0 my $C = Rutf8 map {($_, chr(0))} @$c; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1466 0         0 Mov r14, $O; # Address open bracket
1467 0         0 Mov r15, rax; # The bracket number
1468 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Index to bracket
1469 0         0 PrintOutUtf8Char; # Print opening bracket
1470 0         0 Mov r14, $C; # Address close bracket
1471 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Closing brackets occupy 3 bytes
1472 0         0 PrintOutUtf8Char; # Print closing bracket
1473 0         0 Jmp $success;
1474 0         0 };
1475              
1476 0         0 Mov r12, -1; # Alphabet to use
1477 0         0 Cmp rax, $variable; # Test for variable
1478             IfEq
1479             Then
1480 0         0 {my $b = $Lex->{alphabetsOrdered}{variable}; # Load variable alphabet in dwords
1481 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1482 0         0 my $a = Rd @b;
1483 0         0 Mov r12, $a;
1484 0         0 Jmp $print;
1485 0         0 };
1486              
1487 0         0 Cmp rax, $assign; # Assign operator
1488             IfEq
1489             Then
1490 0         0 {my $b = $Lex->{alphabetsOrdered}{assign};
1491 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1492 0         0 my $a = Rd @b;
1493 0         0 Mov r12, $a;
1494 0         0 Jmp $print;
1495 0         0 };
1496              
1497 0         0 Cmp rax, $dyad; # Dyad
1498             IfEq
1499             Then
1500 0         0 {my $b = $Lex->{alphabetsOrdered}{dyad};
1501 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1502 0         0 my $a = Rd @b;
1503 0         0 Mov r12, $a;
1504 0         0 Jmp $print;
1505 0         0 };
1506              
1507 0         0 Cmp rax, $Ascii; # Ascii
1508             IfEq
1509             Then
1510 0         0 {my $b = $Lex->{alphabetsOrdered}{Ascii};
1511 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1512 0         0 my $a = Rd @b;
1513 0         0 Mov r12, $a;
1514 0         0 Jmp $print;
1515 0         0 };
1516              
1517 0         0 Cmp rax, $prefix; # Prefix
1518             IfEq
1519             Then
1520 0         0 {my $b = $Lex->{alphabetsOrdered}{prefix};
1521 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1522 0         0 my $a = Rd @b;
1523 0         0 Mov r12, $a;
1524 0         0 Jmp $print;
1525 0         0 };
1526              
1527 0         0 Cmp rax, $suffix; # Suffix
1528             IfEq
1529             Then
1530 0         0 {my $b = $Lex->{alphabetsOrdered}{suffix};
1531 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1532 0         0 my $a = Rd @b;
1533 0         0 Mov r12, $a;
1534 0         0 Jmp $print;
1535 0         0 };
1536              
1537 0         0 PrintErrTraceBack; # Unknown lexical type
1538 0         0 PrintErrStringNL "Alphabet not found for unexpected lexical item";
1539 0         0 PrintErrRegisterInHex rax;
1540 0         0 Exit(1);
1541              
1542 0         0 SetLabel $print; # Decoded
1543              
1544             $$p{size}->for(sub # Write each letter out from its position on the stack
1545 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1546 0         0 $index->setReg(r14); # Index stack
1547 0         0 ClearRegisters r15; # Next instruction does not clear the entire register
1548 0         0 Mov r15b, "[r13+4*r14]"; # Load alphabet offset from stack
1549 0         0 Shl r15, 2; # Each letter is 4 bytes wide in utf8
1550 0         0 Lea rax, "[r12+r15]"; # Address alphabet letter as utf8
1551 0         0 PrintOutUtf8Char; # Print utf8 character
1552 0         0 });
1553              
1554 0         0 SetLabel $success; # Done
1555              
1556 0         0 PopR;
1557 0         0 } [qw(offset source32 size)],
1558             name => q(Unisyn::Parse::printLexicalItem);
1559              
1560 0         0 $s->call(offset => $offset, source32 => $source32, size => $size);
1561             }
1562              
1563             sub print($) # Print a parse tree.
1564 0     0 1 0 {my ($parse) = @_; # Parse tree
1565 0         0 my $t = $parse->arena->DescribeTree;
1566              
1567 0         0 PushR my ($depthR) = (r12); # Recursion depth
1568              
1569             my $b = Subroutine # Print the spacing blanks to offset sub trees
1570             {V(loop, $depthR)->for(sub
1571 0         0 {PrintOutString " ";
1572 0     0   0 });
1573 0         0 } [], name => "Nasm::X86::Tree::dump::spaces";
1574              
1575             my $s = Subroutine # Print a tree
1576 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1577              
1578 0         0 my $B = $$p{bs};
1579              
1580 0         0 $t->address->copy($$p{bs});
1581 0         0 $t->first ->copy($$p{first});
1582 0         0 $t->find(K(key, 0)); # Key 0 tells us the type of the element - normally a term
1583              
1584             If $t->found == 0, # Not found key 0
1585             Then
1586 0         0 {PrintOutString "No type for node";
1587 0         0 Exit(1);
1588 0         0 };
1589              
1590             If $t->data != $term, # Expected a term
1591             Then
1592 0         0 {PrintOutString "Expected a term";
1593 0         0 Exit(1);
1594 0         0 };
1595              
1596 0         0 my $operands = V(operands); # Number of operands
1597 0         0 $t->find(K(key, 1)); # Key 1 tells us the number of operands
1598             If $t->found > 0, # Found key 1
1599             Then
1600 0         0 {$operands->copy($t->data); # Number of operands
1601             },
1602             Else
1603 0         0 {PrintOutString "Expected at least one operand";
1604 0         0 Exit(1);
1605 0         0 };
1606              
1607             $operands->for(sub # Each operand
1608 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1609 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1610 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1611 0         0 $t->find($i+$lexItemOffset); my $off = V(data)->copy($t->data); # Offset in source
  0         0  
1612 0         0 $t->find($i+$lexItemLength); my $len = V(data)->copy($t->data); # Length in source
  0         0  
1613              
1614 0         0 $b->call; # Indent
1615              
1616             If $lex == $term, # Term
1617             Then
1618 0         0 {PrintOutStringNL "Term";
1619 0         0 Inc $depthR; # Increase indentation for sub terms
1620 0         0 $s->call($B, first => $off, $$p{source32}); # Print sub tree referenced by offset field
1621 0         0 Dec $depthR; # Restore existing indentation
1622 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1623             },
1624              
1625 0         0 Ef {$lex == $semiColon} # Semicolon
1626             Then
1627 0         0 {PrintOutStringNL "Semicolon";
1628             },
1629              
1630             Else
1631             {If $lex == $variable, # Variable
1632             Then
1633 0         0 {PrintOutString "Variable: ";
1634             },
1635              
1636 0         0 Ef {$lex == $assign} # Assign
1637             Then
1638 0         0 {PrintOutString "Assign: ";
1639             },
1640              
1641 0         0 Ef {$lex == $prefix} # Prefix
1642             Then
1643 0         0 {PrintOutString "Prefix: ";
1644             },
1645              
1646 0         0 Ef {$lex == $suffix} # Suffix
1647             Then
1648 0         0 {PrintOutString "Suffix: ";
1649             },
1650              
1651 0         0 Ef {$lex == $dyad} # Dyad
1652             Then
1653 0         0 {PrintOutString "Dyad: ";
1654             },
1655              
1656 0         0 Ef {$lex == $Ascii} # Ascii
1657             Then
1658 0         0 {PrintOutString "Ascii: ";
1659             },
1660              
1661             Else # Brackets
1662 0         0 {PrintOutString "Brackets: ";
1663 0         0 };
1664              
1665 0         0 $parse->printLexicalItem($$p{source32}, $off, $len); # Print the variable name
1666 0         0 PrintOutNL;
1667 0         0 };
1668              
1669             If $index == 0, # Operator followed by indented operands
1670             Then
1671 0         0 {Inc $depthR;
1672 0         0 };
1673 0         0 });
1674              
1675 0         0 Dec $depthR; # Reset indentation after operands
1676 0         0 } [qw(bs first source32)], name => "Nasm::X86::Tree::print";
1677              
1678 0         0 ClearRegisters $depthR; # Depth starts at zero
1679              
1680 0         0 $s->call($parse->arena->bs, first => $parse->parse, $parse->source32);
1681              
1682 0         0 PopR;
1683             } # print
1684              
1685             sub dumpParseTree($) # Dump the parse tree.
1686 0     0 1 0 {my ($parse) = @_; # Parse tree
1687 0         0 my $t = $parse->arena->DescribeTree;
1688 0         0 $t->first->copy($parse->parse);
1689 0         0 $t->dump;
1690             }
1691              
1692             #D1 Execute # Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
1693              
1694             sub lexToSub($$$$) # Map a lexical item to a processing subroutine.
1695 0     0 1 0 {my ($parse, $alphabet, $op, $sub) = @_; # Sub quarks, the alphabet number, the operator name in that alphabet, subroutine definition
1696 0         0 my $a = &lexicalData->{alphabetsOrdered}{$alphabet}; # Alphabet
1697 0         0 my $n = $$Lex{lexicals}{$alphabet}{number}; # Number of lexical type
1698 0         0 my %i = map {$$a[$_]=>$_} keys @$a;
  0         0  
1699 0         0 my @b = ($n, map {$i{ord $_}} split //, $op); # Bytes representing the operator name
  0         0  
1700 0         0 my $s = join '', map {chr $_} @b; # String representation
  0         0  
1701 0         0 $parse->operators->putSub($s, $sub); # Add the string, subroutine combination to the sub quarks
1702             }
1703              
1704             sub dyad($$$) # Define a method for a dyadic operator.
1705 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1706 0         0 $parse->lexToSub("dyad", $text, $sub);
1707             }
1708              
1709             sub assign($$$) # Define a method for an assign operator.
1710 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1711 0         0 $parse->lexToSub("assign", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1712             }
1713              
1714             sub prefix($$$) # Define a method for a prefix operator.
1715 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1716 0         0 $parse->lexToSub("prefix", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1717             }
1718              
1719             sub suffix($$$) # Define a method for a suffix operator.
1720 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1721 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1722 0         0 $parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1723             }
1724              
1725              
1726             sub ascii($$) # Define a method for ascii text.
1727 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1728 0         0 my $n = $$Lex{lexicals}{Ascii}{number}; # Lexical number of ascii
1729 0         0 $parse->operators->putSub(chr($n), $sub); # Add the ascii subroutine to the sub quarks
1730             }
1731              
1732             sub semiColon($$) # Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
1733 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1734 0         0 my $n = $$Lex{lexicals}{semiColon}{number}; # Lexical number of semicolon
1735 0         0 $parse->operators->putSub(chr($n), $sub); # Add the semicolon subroutine to the sub quarks
1736 0         0 my $N = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semi colon
1737 0         0 $parse->operators->putSub(chr($N), $sub); # Add the semicolon subroutine to the sub quarks
1738             }
1739              
1740             sub variable($$) # Define a method for a variable.
1741 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1742 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1743 0         0 $parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1744             }
1745              
1746             sub bracket($$$) # Define a method for a bracket operator.
1747 0     0 1 0 {my ($parse, $open, $sub) = @_; # Sub quarks, opening parenthesis, associated subroutine
1748 0         0 my $l = &lexicalData;
1749 0         0 my $s = join '', sort $l->{bracketsOpen}->@*;#, $l->{bracketsClose}->@*; # Bracket alphabet
1750 0         0 my $b = index($s, $open);
1751 0 0       0 $b < 0 and confess "No such bracket: $open";
1752 0         0 my $n = $$Lex{lexicals}{OpenBracket}{number}; # Lexical number of open bracket
1753 0         0 $parse->operators->putSub(chr($n).chr($b+1+$l->{bracketsBase}), $sub); # Why plus one? # Add the brackets subroutine to the sub quarks
1754             }
1755              
1756             #D1 Alphabets # Translate between alphabets.
1757              
1758             sub showAlphabet($) #P Show an alphabet.
1759 0     0 1 0 {my ($alphabet) = @_; # Alphabet name
1760 0         0 my $out;
1761 0         0 my $lex = &lexicalData;
1762 0         0 my $abc = $lex->{alphabetsOrdered}{$alphabet};
1763 0         0 for my $a(@$abc)
1764 0         0 {$out .= chr($a);
1765             }
1766             $out
1767 0         0 }
1768              
1769             sub asciiToAssignLatin($) # Translate ascii to the corresponding letters in the assign latin alphabet.
1770 0     0 1 0 {my ($in) = @_; # A string of ascii
1771 1     1   19776 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧/r;
  1         3  
  1         20  
  0         0  
1772             }
1773              
1774             sub asciiToAssignGreek($) # Translate ascii to the corresponding letters in the assign greek alphabet.
1775 0     0 1 0 {my ($in) = @_; # A string of ascii
1776 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔/r;
1777             }
1778              
1779             sub asciiToDyadLatin($) # Translate ascii to the corresponding letters in the dyad latin alphabet.
1780 0     0 1 0 {my ($in) = @_; # A string of ascii
1781 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳/r;
1782             }
1783              
1784             sub asciiToDyadGreek($) # Translate ascii to the corresponding letters in the dyad greek alphabet.
1785 0     0 1 0 {my ($in) = @_; # A string of ascii
1786 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚/r;
1787             }
1788              
1789             sub asciiToPrefixLatin($) # Translate ascii to the corresponding letters in the prefix latin alphabet.
1790 0     0 1 0 {my ($in) = @_; # A string of ascii
1791 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛/r;
1792             }
1793              
1794             sub asciiToPrefixGreek($) # Translate ascii to the corresponding letters in the prefix greek alphabet.
1795 0     0 1 0 {my ($in) = @_; # A string of ascii
1796 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎/r;
1797             }
1798              
1799             sub asciiToSuffixLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1800 0     0 1 0 {my ($in) = @_; # A string of ascii
1801 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯/r;
1802             }
1803              
1804             sub asciiToSuffixGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1805 0     0 1 0 {my ($in) = @_; # A string of ascii
1806 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂/r;
1807             }
1808              
1809             sub asciiToVariableLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1810 0     0 1 0 {my ($in) = @_; # A string of ascii
1811 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇/r;
1812             }
1813              
1814             sub asciiToVariableGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1815 0     0 1 0 {my ($in) = @_; # A string of ascii
1816 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈/r;
1817             }
1818              
1819             sub asciiToEscaped($) # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1820 0     0 1 0 {my ($in) = @_; # A string of ascii
1821 0         0 $in =~ tr/abcdefghijklmnopqrstuvwxyz/🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩/r;
1822             }
1823              
1824             sub semiColonChar() # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1825             {chr(10210)
1826             }
1827              
1828             #d
1829 1     1 0 3 sub lexicalData {do {
1830 1         342 my $a = bless({
1831             alphabetRanges => 14,
1832             alphabets => {
1833             "circledLatinLetter" => "\x{24B6}\x{24B7}\x{24B8}\x{24B9}\x{24BA}\x{24BB}\x{24BC}\x{24BD}\x{24BE}\x{24BF}\x{24C0}\x{24C1}\x{24C2}\x{24C3}\x{24C4}\x{24C5}\x{24C6}\x{24C7}\x{24C8}\x{24C9}\x{24CA}\x{24CB}\x{24CC}\x{24CD}\x{24CE}\x{24CF}\x{24D0}\x{24D1}\x{24D2}\x{24D3}\x{24D4}\x{24D5}\x{24D6}\x{24D7}\x{24D8}\x{24D9}\x{24DA}\x{24DB}\x{24DC}\x{24DD}\x{24DE}\x{24DF}\x{24E0}\x{24E1}\x{24E2}\x{24E3}\x{24E4}\x{24E5}\x{24E6}\x{24E7}\x{24E8}\x{24E9}",
1834             "mathematicalBold" => "\x{1D400}\x{1D401}\x{1D402}\x{1D403}\x{1D404}\x{1D405}\x{1D406}\x{1D407}\x{1D408}\x{1D409}\x{1D40A}\x{1D40B}\x{1D40C}\x{1D40D}\x{1D40E}\x{1D40F}\x{1D410}\x{1D411}\x{1D412}\x{1D413}\x{1D414}\x{1D415}\x{1D416}\x{1D417}\x{1D418}\x{1D419}\x{1D41A}\x{1D41B}\x{1D41C}\x{1D41D}\x{1D41E}\x{1D41F}\x{1D420}\x{1D421}\x{1D422}\x{1D423}\x{1D424}\x{1D425}\x{1D426}\x{1D427}\x{1D428}\x{1D429}\x{1D42A}\x{1D42B}\x{1D42C}\x{1D42D}\x{1D42E}\x{1D42F}\x{1D430}\x{1D431}\x{1D432}\x{1D433}\x{1D6A8}\x{1D6A9}\x{1D6AA}\x{1D6AB}\x{1D6AC}\x{1D6AD}\x{1D6AE}\x{1D6AF}\x{1D6B0}\x{1D6B1}\x{1D6B2}\x{1D6B3}\x{1D6B4}\x{1D6B5}\x{1D6B6}\x{1D6B7}\x{1D6B8}\x{1D6B9}\x{1D6BA}\x{1D6BB}\x{1D6BC}\x{1D6BD}\x{1D6BE}\x{1D6BF}\x{1D6C0}\x{1D6C1}\x{1D6C2}\x{1D6C3}\x{1D6C4}\x{1D6C5}\x{1D6C6}\x{1D6C7}\x{1D6C8}\x{1D6C9}\x{1D6CA}\x{1D6CB}\x{1D6CC}\x{1D6CD}\x{1D6CE}\x{1D6CF}\x{1D6D0}\x{1D6D1}\x{1D6D2}\x{1D6D3}\x{1D6D4}\x{1D6D5}\x{1D6D6}\x{1D6D7}\x{1D6D8}\x{1D6D9}\x{1D6DA}\x{1D6DB}\x{1D6DC}\x{1D6DD}\x{1D6DE}\x{1D6DF}\x{1D6E0}\x{1D6E1}",
1835             "mathematicalBoldFraktur" => "\x{1D56C}\x{1D56D}\x{1D56E}\x{1D56F}\x{1D570}\x{1D571}\x{1D572}\x{1D573}\x{1D574}\x{1D575}\x{1D576}\x{1D577}\x{1D578}\x{1D579}\x{1D57A}\x{1D57B}\x{1D57C}\x{1D57D}\x{1D57E}\x{1D57F}\x{1D580}\x{1D581}\x{1D582}\x{1D583}\x{1D584}\x{1D585}\x{1D586}\x{1D587}\x{1D588}\x{1D589}\x{1D58A}\x{1D58B}\x{1D58C}\x{1D58D}\x{1D58E}\x{1D58F}\x{1D590}\x{1D591}\x{1D592}\x{1D593}\x{1D594}\x{1D595}\x{1D596}\x{1D597}\x{1D598}\x{1D599}\x{1D59A}\x{1D59B}\x{1D59C}\x{1D59D}\x{1D59E}\x{1D59F}",
1836             "mathematicalBoldItalic" => "\x{1D468}\x{1D469}\x{1D46A}\x{1D46B}\x{1D46C}\x{1D46D}\x{1D46E}\x{1D46F}\x{1D470}\x{1D471}\x{1D472}\x{1D473}\x{1D474}\x{1D475}\x{1D476}\x{1D477}\x{1D478}\x{1D479}\x{1D47A}\x{1D47B}\x{1D47C}\x{1D47D}\x{1D47E}\x{1D47F}\x{1D480}\x{1D481}\x{1D482}\x{1D483}\x{1D484}\x{1D485}\x{1D486}\x{1D487}\x{1D488}\x{1D489}\x{1D48A}\x{1D48B}\x{1D48C}\x{1D48D}\x{1D48E}\x{1D48F}\x{1D490}\x{1D491}\x{1D492}\x{1D493}\x{1D494}\x{1D495}\x{1D496}\x{1D497}\x{1D498}\x{1D499}\x{1D49A}\x{1D49B}\x{1D71C}\x{1D71D}\x{1D71E}\x{1D71F}\x{1D720}\x{1D721}\x{1D722}\x{1D723}\x{1D724}\x{1D725}\x{1D726}\x{1D727}\x{1D728}\x{1D729}\x{1D72A}\x{1D72B}\x{1D72C}\x{1D72D}\x{1D72E}\x{1D72F}\x{1D730}\x{1D731}\x{1D732}\x{1D733}\x{1D734}\x{1D735}\x{1D736}\x{1D737}\x{1D738}\x{1D739}\x{1D73A}\x{1D73B}\x{1D73C}\x{1D73D}\x{1D73E}\x{1D73F}\x{1D740}\x{1D741}\x{1D742}\x{1D743}\x{1D744}\x{1D745}\x{1D746}\x{1D747}\x{1D748}\x{1D749}\x{1D74A}\x{1D74B}\x{1D74C}\x{1D74D}\x{1D74E}\x{1D74F}\x{1D750}\x{1D751}\x{1D752}\x{1D753}\x{1D754}\x{1D755}",
1837             "mathematicalBoldScript" => "\x{1D4D0}\x{1D4D1}\x{1D4D2}\x{1D4D3}\x{1D4D4}\x{1D4D5}\x{1D4D6}\x{1D4D7}\x{1D4D8}\x{1D4D9}\x{1D4DA}\x{1D4DB}\x{1D4DC}\x{1D4DD}\x{1D4DE}\x{1D4DF}\x{1D4E0}\x{1D4E1}\x{1D4E2}\x{1D4E3}\x{1D4E4}\x{1D4E5}\x{1D4E6}\x{1D4E7}\x{1D4E8}\x{1D4E9}\x{1D4EA}\x{1D4EB}\x{1D4EC}\x{1D4ED}\x{1D4EE}\x{1D4EF}\x{1D4F0}\x{1D4F1}\x{1D4F2}\x{1D4F3}\x{1D4F4}\x{1D4F5}\x{1D4F6}\x{1D4F7}\x{1D4F8}\x{1D4F9}\x{1D4FA}\x{1D4FB}\x{1D4FC}\x{1D4FD}\x{1D4FE}\x{1D4FF}\x{1D500}\x{1D501}\x{1D502}\x{1D503}",
1838             "mathematicalDouble-struck" => "\x{1D538}\x{1D539}\x{1D53B}\x{1D53C}\x{1D53D}\x{1D53E}\x{1D540}\x{1D541}\x{1D542}\x{1D543}\x{1D544}\x{1D546}\x{1D54A}\x{1D54B}\x{1D54C}\x{1D54D}\x{1D54E}\x{1D54F}\x{1D550}\x{1D552}\x{1D553}\x{1D554}\x{1D555}\x{1D556}\x{1D557}\x{1D558}\x{1D559}\x{1D55A}\x{1D55B}\x{1D55C}\x{1D55D}\x{1D55E}\x{1D55F}\x{1D560}\x{1D561}\x{1D562}\x{1D563}\x{1D564}\x{1D565}\x{1D566}\x{1D567}\x{1D568}\x{1D569}\x{1D56A}\x{1D56B}",
1839             "mathematicalFraktur" => "\x{1D504}\x{1D505}\x{1D507}\x{1D508}\x{1D509}\x{1D50A}\x{1D50D}\x{1D50E}\x{1D50F}\x{1D510}\x{1D511}\x{1D512}\x{1D513}\x{1D514}\x{1D516}\x{1D517}\x{1D518}\x{1D519}\x{1D51A}\x{1D51B}\x{1D51C}\x{1D51E}\x{1D51F}\x{1D520}\x{1D521}\x{1D522}\x{1D523}\x{1D524}\x{1D525}\x{1D526}\x{1D527}\x{1D528}\x{1D529}\x{1D52A}\x{1D52B}\x{1D52C}\x{1D52D}\x{1D52E}\x{1D52F}\x{1D530}\x{1D531}\x{1D532}\x{1D533}\x{1D534}\x{1D535}\x{1D536}\x{1D537}",
1840             "mathematicalItalic" => "\x{1D434}\x{1D435}\x{1D436}\x{1D437}\x{1D438}\x{1D439}\x{1D43A}\x{1D43B}\x{1D43C}\x{1D43D}\x{1D43E}\x{1D43F}\x{1D440}\x{1D441}\x{1D442}\x{1D443}\x{1D444}\x{1D445}\x{1D446}\x{1D447}\x{1D448}\x{1D449}\x{1D44A}\x{1D44B}\x{1D44C}\x{1D44D}\x{1D44E}\x{1D44F}\x{1D450}\x{1D451}\x{1D452}\x{1D453}\x{1D454}\x{1D456}\x{1D457}\x{1D458}\x{1D459}\x{1D45A}\x{1D45B}\x{1D45C}\x{1D45D}\x{1D45E}\x{1D45F}\x{1D460}\x{1D461}\x{1D462}\x{1D463}\x{1D464}\x{1D465}\x{1D466}\x{1D467}\x{1D6E2}\x{1D6E3}\x{1D6E4}\x{1D6E5}\x{1D6E6}\x{1D6E7}\x{1D6E8}\x{1D6E9}\x{1D6EA}\x{1D6EB}\x{1D6EC}\x{1D6ED}\x{1D6EE}\x{1D6EF}\x{1D6F0}\x{1D6F1}\x{1D6F2}\x{1D6F3}\x{1D6F4}\x{1D6F5}\x{1D6F6}\x{1D6F7}\x{1D6F8}\x{1D6F9}\x{1D6FA}\x{1D6FB}\x{1D6FC}\x{1D6FD}\x{1D6FE}\x{1D6FF}\x{1D700}\x{1D701}\x{1D702}\x{1D703}\x{1D704}\x{1D705}\x{1D706}\x{1D707}\x{1D708}\x{1D709}\x{1D70A}\x{1D70B}\x{1D70C}\x{1D70D}\x{1D70E}\x{1D70F}\x{1D710}\x{1D711}\x{1D712}\x{1D713}\x{1D714}\x{1D715}\x{1D716}\x{1D717}\x{1D718}\x{1D719}\x{1D71A}\x{1D71B}",
1841             "mathematicalMonospace" => "\x{1D670}\x{1D671}\x{1D672}\x{1D673}\x{1D674}\x{1D675}\x{1D676}\x{1D677}\x{1D678}\x{1D679}\x{1D67A}\x{1D67B}\x{1D67C}\x{1D67D}\x{1D67E}\x{1D67F}\x{1D680}\x{1D681}\x{1D682}\x{1D683}\x{1D684}\x{1D685}\x{1D686}\x{1D687}\x{1D688}\x{1D689}\x{1D68A}\x{1D68B}\x{1D68C}\x{1D68D}\x{1D68E}\x{1D68F}\x{1D690}\x{1D691}\x{1D692}\x{1D693}\x{1D694}\x{1D695}\x{1D696}\x{1D697}\x{1D698}\x{1D699}\x{1D69A}\x{1D69B}\x{1D69C}\x{1D69D}\x{1D69E}\x{1D69F}\x{1D6A0}\x{1D6A1}\x{1D6A2}\x{1D6A3}",
1842             "mathematicalSans-serif" => "\x{1D5A0}\x{1D5A1}\x{1D5A2}\x{1D5A3}\x{1D5A4}\x{1D5A5}\x{1D5A6}\x{1D5A7}\x{1D5A8}\x{1D5A9}\x{1D5AA}\x{1D5AB}\x{1D5AC}\x{1D5AD}\x{1D5AE}\x{1D5AF}\x{1D5B0}\x{1D5B1}\x{1D5B2}\x{1D5B3}\x{1D5B4}\x{1D5B5}\x{1D5B6}\x{1D5B7}\x{1D5B8}\x{1D5B9}\x{1D5BA}\x{1D5BB}\x{1D5BC}\x{1D5BD}\x{1D5BE}\x{1D5BF}\x{1D5C0}\x{1D5C1}\x{1D5C2}\x{1D5C3}\x{1D5C4}\x{1D5C5}\x{1D5C6}\x{1D5C7}\x{1D5C8}\x{1D5C9}\x{1D5CA}\x{1D5CB}\x{1D5CC}\x{1D5CD}\x{1D5CE}\x{1D5CF}\x{1D5D0}\x{1D5D1}\x{1D5D2}\x{1D5D3}",
1843             "mathematicalSans-serifBold" => "\x{1D5D4}\x{1D5D5}\x{1D5D6}\x{1D5D7}\x{1D5D8}\x{1D5D9}\x{1D5DA}\x{1D5DB}\x{1D5DC}\x{1D5DD}\x{1D5DE}\x{1D5DF}\x{1D5E0}\x{1D5E1}\x{1D5E2}\x{1D5E3}\x{1D5E4}\x{1D5E5}\x{1D5E6}\x{1D5E7}\x{1D5E8}\x{1D5E9}\x{1D5EA}\x{1D5EB}\x{1D5EC}\x{1D5ED}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{1D5F1}\x{1D5F2}\x{1D5F3}\x{1D5F4}\x{1D5F5}\x{1D5F6}\x{1D5F7}\x{1D5F8}\x{1D5F9}\x{1D5FA}\x{1D5FB}\x{1D5FC}\x{1D5FD}\x{1D5FE}\x{1D5FF}\x{1D600}\x{1D601}\x{1D602}\x{1D603}\x{1D604}\x{1D605}\x{1D606}\x{1D607}\x{1D756}\x{1D757}\x{1D758}\x{1D759}\x{1D75A}\x{1D75B}\x{1D75C}\x{1D75D}\x{1D75E}\x{1D75F}\x{1D760}\x{1D761}\x{1D762}\x{1D763}\x{1D764}\x{1D765}\x{1D766}\x{1D767}\x{1D768}\x{1D769}\x{1D76A}\x{1D76B}\x{1D76C}\x{1D76D}\x{1D76E}\x{1D76F}\x{1D770}\x{1D771}\x{1D772}\x{1D773}\x{1D774}\x{1D775}\x{1D776}\x{1D777}\x{1D778}\x{1D779}\x{1D77A}\x{1D77B}\x{1D77C}\x{1D77D}\x{1D77E}\x{1D77F}\x{1D780}\x{1D781}\x{1D782}\x{1D783}\x{1D784}\x{1D785}\x{1D786}\x{1D787}\x{1D788}\x{1D789}\x{1D78A}\x{1D78B}\x{1D78C}\x{1D78D}\x{1D78E}\x{1D78F}",
1844             "mathematicalSans-serifBoldItalic" => "\x{1D63C}\x{1D63D}\x{1D63E}\x{1D63F}\x{1D640}\x{1D641}\x{1D642}\x{1D643}\x{1D644}\x{1D645}\x{1D646}\x{1D647}\x{1D648}\x{1D649}\x{1D64A}\x{1D64B}\x{1D64C}\x{1D64D}\x{1D64E}\x{1D64F}\x{1D650}\x{1D651}\x{1D652}\x{1D653}\x{1D654}\x{1D655}\x{1D656}\x{1D657}\x{1D658}\x{1D659}\x{1D65A}\x{1D65B}\x{1D65C}\x{1D65D}\x{1D65E}\x{1D65F}\x{1D660}\x{1D661}\x{1D662}\x{1D663}\x{1D664}\x{1D665}\x{1D666}\x{1D667}\x{1D668}\x{1D669}\x{1D66A}\x{1D66B}\x{1D66C}\x{1D66D}\x{1D66E}\x{1D66F}\x{1D790}\x{1D791}\x{1D792}\x{1D793}\x{1D794}\x{1D795}\x{1D796}\x{1D797}\x{1D798}\x{1D799}\x{1D79A}\x{1D79B}\x{1D79C}\x{1D79D}\x{1D79E}\x{1D79F}\x{1D7A0}\x{1D7A1}\x{1D7A2}\x{1D7A3}\x{1D7A4}\x{1D7A5}\x{1D7A6}\x{1D7A7}\x{1D7A8}\x{1D7A9}\x{1D7AA}\x{1D7AB}\x{1D7AC}\x{1D7AD}\x{1D7AE}\x{1D7AF}\x{1D7B0}\x{1D7B1}\x{1D7B2}\x{1D7B3}\x{1D7B4}\x{1D7B5}\x{1D7B6}\x{1D7B7}\x{1D7B8}\x{1D7B9}\x{1D7BA}\x{1D7BB}\x{1D7BC}\x{1D7BD}\x{1D7BE}\x{1D7BF}\x{1D7C0}\x{1D7C1}\x{1D7C2}\x{1D7C3}\x{1D7C4}\x{1D7C5}\x{1D7C6}\x{1D7C7}\x{1D7C8}\x{1D7C9}",
1845             "mathematicalSans-serifItalic" => "\x{1D608}\x{1D609}\x{1D60A}\x{1D60B}\x{1D60C}\x{1D60D}\x{1D60E}\x{1D60F}\x{1D610}\x{1D611}\x{1D612}\x{1D613}\x{1D614}\x{1D615}\x{1D616}\x{1D617}\x{1D618}\x{1D619}\x{1D61A}\x{1D61B}\x{1D61C}\x{1D61D}\x{1D61E}\x{1D61F}\x{1D620}\x{1D621}\x{1D622}\x{1D623}\x{1D624}\x{1D625}\x{1D626}\x{1D627}\x{1D628}\x{1D629}\x{1D62A}\x{1D62B}\x{1D62C}\x{1D62D}\x{1D62E}\x{1D62F}\x{1D630}\x{1D631}\x{1D632}\x{1D633}\x{1D634}\x{1D635}\x{1D636}\x{1D637}\x{1D638}\x{1D639}\x{1D63A}\x{1D63B}",
1846             "mathematicalScript" => "\x{1D49C}\x{1D49E}\x{1D49F}\x{1D4A2}\x{1D4A5}\x{1D4A6}\x{1D4A9}\x{1D4AA}\x{1D4AB}\x{1D4AC}\x{1D4AE}\x{1D4AF}\x{1D4B0}\x{1D4B1}\x{1D4B2}\x{1D4B3}\x{1D4B4}\x{1D4B5}\x{1D4B6}\x{1D4B7}\x{1D4B8}\x{1D4B9}\x{1D4BB}\x{1D4BD}\x{1D4BE}\x{1D4BF}\x{1D4C0}\x{1D4C1}\x{1D4C2}\x{1D4C3}\x{1D4C5}\x{1D4C6}\x{1D4C7}\x{1D4C8}\x{1D4C9}\x{1D4CA}\x{1D4CB}\x{1D4CC}\x{1D4CD}\x{1D4CE}\x{1D4CF}",
1847             "negativeCircledLatinLetter" => "\x{1F150}\x{1F151}\x{1F152}\x{1F153}\x{1F154}\x{1F155}\x{1F156}\x{1F157}\x{1F158}\x{1F159}\x{1F15A}\x{1F15B}\x{1F15C}\x{1F15D}\x{1F15E}\x{1F15F}\x{1F160}\x{1F161}\x{1F162}\x{1F163}\x{1F164}\x{1F165}\x{1F166}\x{1F167}\x{1F168}\x{1F169}",
1848             "negativeSquaredLatinLetter" => "\x{1F170}\x{1F171}\x{1F172}\x{1F173}\x{1F174}\x{1F175}\x{1F176}\x{1F177}\x{1F178}\x{1F179}\x{1F17A}\x{1F17B}\x{1F17C}\x{1F17D}\x{1F17E}\x{1F17F}\x{1F180}\x{1F181}\x{1F182}\x{1F183}\x{1F184}\x{1F185}\x{1F186}\x{1F187}\x{1F188}\x{1F189}",
1849             "planck" => "\x{210E}",
1850             "semiColon" => "\x{27E2}",
1851             "squaredLatinLetter" => "\x{1F130}\x{1F131}\x{1F132}\x{1F133}\x{1F134}\x{1F135}\x{1F136}\x{1F137}\x{1F138}\x{1F139}\x{1F13A}\x{1F13B}\x{1F13C}\x{1F13D}\x{1F13E}\x{1F13F}\x{1F140}\x{1F141}\x{1F142}\x{1F143}\x{1F144}\x{1F145}\x{1F146}\x{1F147}\x{1F148}\x{1F149}\x{1F1A5}",
1852             },
1853             alphabetsOrdered => {
1854             Ascii => [0 .. 127, 127312 .. 127337],
1855             assign => [8462, 119860 .. 119911, 120546 .. 120603],
1856             dyad => [119808 .. 119859, 120488 .. 120545],
1857             prefix => [119912 .. 119963, 120604 .. 120661],
1858             semiColon => [10210],
1859             suffix => [120380 .. 120431, 120720 .. 120777],
1860             variable => [120276 .. 120327, 120662 .. 120719],
1861             },
1862             brackets => 16,
1863             bracketsBase => 16,
1864             bracketsClose => [
1865             "\x{2309}",
1866             "\x{230B}",
1867             "\x{232A}",
1868             "\x{2769}",
1869             "\x{276B}",
1870             "\x{276D}",
1871             "\x{276F}",
1872             "\x{2771}",
1873             "\x{2773}",
1874             "\x{2775}",
1875             "\x{27E7}",
1876             "\x{27E9}",
1877             "\x{27EB}",
1878             "\x{27ED}",
1879             "\x{27EF}",
1880             "\x{2984}",
1881             "\x{2986}",
1882             "\x{2988}",
1883             "\x{298A}",
1884             "\x{298C}",
1885             "\x{298E}",
1886             "\x{2990}",
1887             "\x{2992}",
1888             "\x{2994}",
1889             "\x{2996}",
1890             "\x{2998}",
1891             "\x{29FD}",
1892             "\x{2E29}",
1893             "\x{3009}",
1894             "\x{300B}",
1895             "\x{3011}",
1896             "\x{3015}",
1897             "\x{3017}",
1898             "\x{3019}",
1899             "\x{301B}",
1900             "\x{FD3F}",
1901             "\x{FF09}",
1902             "\x{FF60}",
1903             ],
1904             bracketsHigh => [
1905             "0x1300230b",
1906             "0x1500232a",
1907             "0x23002775",
1908             "0x2d0027ef",
1909             "0x43002998",
1910             "0x450029fd",
1911             "0x47002e29",
1912             "0x4b00300b",
1913             "0x4d003011",
1914             "0x5500301b",
1915             "0x5700fd3f",
1916             "0x5900ff09",
1917             "0x5b00ff60",
1918             0,
1919             0,
1920             0,
1921             ],
1922             bracketsLow => [
1923             "0x10002308",
1924             "0x14002329",
1925             "0x16002768",
1926             "0x240027e6",
1927             "0x2e002983",
1928             "0x440029fc",
1929             "0x46002e28",
1930             "0x48003008",
1931             "0x4c003010",
1932             "0x4e003014",
1933             "0x5600fd3e",
1934             "0x5800ff08",
1935             "0x5a00ff5f",
1936             0,
1937             0,
1938             0,
1939             ],
1940             bracketsOpen => [
1941             "\x{2308}",
1942             "\x{230A}",
1943             "\x{2329}",
1944             "\x{2768}",
1945             "\x{276A}",
1946             "\x{276C}",
1947             "\x{276E}",
1948             "\x{2770}",
1949             "\x{2772}",
1950             "\x{2774}",
1951             "\x{27E6}",
1952             "\x{27E8}",
1953             "\x{27EA}",
1954             "\x{27EC}",
1955             "\x{27EE}",
1956             "\x{2983}",
1957             "\x{2985}",
1958             "\x{2987}",
1959             "\x{2989}",
1960             "\x{298B}",
1961             "\x{298D}",
1962             "\x{298F}",
1963             "\x{2991}",
1964             "\x{2993}",
1965             "\x{2995}",
1966             "\x{2997}",
1967             "\x{29FC}",
1968             "\x{2E28}",
1969             "\x{3008}",
1970             "\x{300A}",
1971             "\x{3010}",
1972             "\x{3014}",
1973             "\x{3016}",
1974             "\x{3018}",
1975             "\x{301A}",
1976             "\x{FD3E}",
1977             "\x{FF08}",
1978             "\x{FF5F}",
1979             ],
1980             lexicalAlpha => {
1981             "" => [
1982             "circledLatinLetter",
1983             "mathematicalBoldFraktur",
1984             "mathematicalBoldScript",
1985             "mathematicalDouble-struck",
1986             "mathematicalFraktur",
1987             "mathematicalMonospace",
1988             "mathematicalSans-serif",
1989             "mathematicalSans-serifItalic",
1990             "mathematicalScript",
1991             "negativeSquaredLatinLetter",
1992             "semiColon",
1993             "squaredLatinLetter",
1994             ],
1995             "Ascii" => ["negativeCircledLatinLetter"],
1996             "assign" => ["mathematicalItalic", "planck"],
1997             "CloseBracket" => [],
1998             "dyad" => ["mathematicalBold"],
1999             "OpenBracket" => [],
2000             "prefix" => ["mathematicalBoldItalic"],
2001             "semiColon" => [],
2002             "suffix" => ["mathematicalSans-serifBoldItalic"],
2003             "term" => [],
2004             "variable" => ["mathematicalSans-serifBold"],
2005             },
2006             lexicalHigh => [
2007             127,
2008             8462,
2009             10210,
2010             119859,
2011             16897127,
2012             119963,
2013             120327,
2014             120431,
2015             872535777,
2016             889313051,
2017             872535893,
2018             872535951,
2019             872536009,
2020             2147610985,
2021             0,
2022             0,
2023             ],
2024             lexicalLow => [
2025             33554432,
2026             83894542,
2027             134227938,
2028             50451456,
2029             84005940,
2030             67228776,
2031             100783572,
2032             117560892,
2033             50452136,
2034             84006626,
2035             67229468,
2036             100783958,
2037             117561232,
2038             33681744,
2039             0,
2040             0,
2041             ],
2042             lexicals => bless({
2043             Ascii => bless({ letter => "a", like => "v", name => "Ascii", number => 2 }, "Unisyn::Parse::Lexical::Constant"),
2044             assign => bless({ letter => "a", like => "a", name => "assign", number => 5 }, "Unisyn::Parse::Lexical::Constant"),
2045             CloseBracket => bless({ letter => "B", like => "B", name => "CloseBracket", number => 1 }, "Unisyn::Parse::Lexical::Constant"),
2046             dyad => bless({ letter => "d", like => "d", name => "dyad", number => 3 }, "Unisyn::Parse::Lexical::Constant"),
2047             empty => bless({ letter => "e", like => "e", name => "empty", number => 10 }, "Unisyn::Parse::Lexical::Constant"),
2048             NewLineSemiColon => bless({ letter => "N", like => undef, name => "NewLineSemiColon", number => 12 }, "Unisyn::Parse::Lexical::Constant"),
2049             OpenBracket => bless({ letter => "b", like => "b", name => "OpenBracket", number => 0 }, "Unisyn::Parse::Lexical::Constant"),
2050             prefix => bless({ letter => "p", like => "p", name => "prefix", number => 4 }, "Unisyn::Parse::Lexical::Constant"),
2051             semiColon => bless({ letter => "s", like => "s", name => "semiColon", number => 8 }, "Unisyn::Parse::Lexical::Constant"),
2052             suffix => bless({ letter => "q", like => "q", name => "suffix", number => 7 }, "Unisyn::Parse::Lexical::Constant"),
2053             term => bless({ letter => "t", like => "t", name => "term", number => 9 }, "Unisyn::Parse::Lexical::Constant"),
2054             variable => bless({ letter => "v", like => "v", name => "variable", number => 6 }, "Unisyn::Parse::Lexical::Constant"),
2055             WhiteSpace => bless({ letter => "W", like => undef, name => "WhiteSpace", number => 11 }, "Unisyn::Parse::Lexical::Constant"),
2056             }, "Unisyn::Parse::Lexicals"),
2057             sampleLexicals => {
2058             A => [
2059             100663296,
2060             83886080,
2061             33554497,
2062             33554464,
2063             33554497,
2064             33554464,
2065             33554464,
2066             33554464,
2067             33554464,
2068             ],
2069             Adv => [
2070             100663296,
2071             83886080,
2072             33554497,
2073             33554464,
2074             33554497,
2075             33554464,
2076             33554464,
2077             33554464,
2078             33554464,
2079             50331648,
2080             100663296,
2081             ],
2082             BB => [
2083             0,
2084             0,
2085             0,
2086             0,
2087             0,
2088             0,
2089             0,
2090             0,
2091             100663296,
2092             16777216,
2093             16777216,
2094             16777216,
2095             16777216,
2096             16777216,
2097             16777216,
2098             16777216,
2099             16777216,
2100             ],
2101             brackets => [
2102             100663296,
2103             83886080,
2104             0,
2105             0,
2106             0,
2107             100663296,
2108             16777216,
2109             16777216,
2110             50331648,
2111             0,
2112             100663296,
2113             16777216,
2114             16777216,
2115             134217728,
2116             ],
2117             bvB => [0, 100663296, 16777216],
2118             nosemi => [
2119             100663296,
2120             83886080,
2121             0,
2122             0,
2123             0,
2124             100663296,
2125             16777216,
2126             16777216,
2127             50331648,
2128             0,
2129             100663296,
2130             16777216,
2131             16777216,
2132             ],
2133             ppppvdvdvqqqq => [
2134             0,
2135             0,
2136             0,
2137             100663296,
2138             83886080,
2139             100663296,
2140             50331648,
2141             0,
2142             100663296,
2143             50331648,
2144             100663296,
2145             16777216,
2146             134217728,
2147             100663296,
2148             83886080,
2149             100663296,
2150             50331648,
2151             100663296,
2152             16777216,
2153             16777216,
2154             16777216,
2155             ],
2156             s => [100663296, 134217728, 100663296],
2157             s1 => [
2158             100663296,
2159             83886080,
2160             33554442,
2161             33554464,
2162             33554464,
2163             33554497,
2164             33554442,
2165             33554464,
2166             33554464,
2167             33554464,
2168             ],
2169             v => [100663296],
2170             vav => [100663296, 83886080, 100663296],
2171             vavav => [100663296, 83886080, 100663296, 83886080, 100663296],
2172             vnsvs => [
2173             100663296,
2174             33554442,
2175             33554464,
2176             33554464,
2177             33554464,
2178             100663296,
2179             33554464,
2180             33554464,
2181             33554464,
2182             ],
2183             vnv => [100663296, 33554442, 100663296],
2184             vnvs => [
2185             100663296,
2186             33554442,
2187             100663296,
2188             33554464,
2189             33554464,
2190             33554464,
2191             33554464,
2192             ],
2193             ws => [
2194             100663296,
2195             83886080,
2196             0,
2197             0,
2198             0,
2199             100663296,
2200             16777216,
2201             16777216,
2202             50331648,
2203             0,
2204             100663296,
2205             16777216,
2206             16777216,
2207             134217728,
2208             100663296,
2209             83886080,
2210             0,
2211             100663296,
2212             50331648,
2213             100663296,
2214             16777216,
2215             134217728,
2216             ],
2217             wsa => [
2218             100663296,
2219             83886080,
2220             0,
2221             0,
2222             0,
2223             100663296,
2224             16777216,
2225             16777216,
2226             50331648,
2227             0,
2228             100663296,
2229             16777216,
2230             16777216,
2231             134217728,
2232             100663296,
2233             83886080,
2234             33554497,
2235             50331648,
2236             100663296,
2237             134217728,
2238             ],
2239             },
2240             sampleText => {
2241             A => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 ",
2242             Adv => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 \x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D603}\x{1D5EE}\x{1D5FF}",
2243             BB => "\x{230A}\x{2329}\x{2768}\x{276A}\x{276C}\x{276E}\x{2770}\x{2772}\x{1D5EE}\x{2773}\x{2771}\x{276F}\x{276D}\x{276B}\x{2769}\x{232A}\x{230B}",
2244             brackets => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}",
2245             bvB => "\x{2329}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{232A}",
2246             nosemi => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}",
2247             ppppvdvdvqqqq => "\x{1D482}\x{2774}\x{1D483}\x{27E6}\x{1D484}\x{27E8}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}\x{1D485}\x{1D5EF}\x{1D659}\x{1D42D}\x{1D422}\x{1D426}\x{1D41E}\x{1D42C}\x{27EA}\x{1D5F0}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F1}\x{27EB}\x{27E2}\x{1D5F2}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{1D5F3}\x{1D42C}\x{1D42E}\x{1D41B}\x{1D5F4}\x{1D65D}\x{27E9}\x{1D658}\x{27E7}\x{1D657}\x{2775}\x{1D656}",
2248             s => "\x{1D5EE}\x{27E2}\x{1D5EF}",
2249             s1 => "\x{1D5EE}\x{1D44E}\n \n ",
2250             v => "\x{1D5EE}",
2251             vav => "\x{1D5EE}\x{1D44E}\x{1D5EF}",
2252             vavav => "\x{1D5EE}\x{1D44E}\x{1D5EF}\x{1D44E}\x{1D5F0}",
2253             vnsvs => "\x{1D5EE}\x{1D5EE}\n \x{1D5EF}\x{1D5EF} ",
2254             vnv => "\x{1D5EE}\n\x{1D5EF}",
2255             vnvs => "\x{1D5EE}\n\x{1D5EF} ",
2256             ws => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{276C}\x{1D5EF}\x{1D5EF}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{276D}\x{27E2}",
2257             wsa => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}some--ascii--text\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{27E2}",
2258             },
2259             semiColon => "\x{27E2}",
2260             separator => "\x{205F}",
2261             structure => bless({
2262             codes => bless({
2263             a => bless({
2264             letter => "a",
2265             name => "assignment operator",
2266             next => "bpv",
2267             short => "assign",
2268             }, "Tree::Term::LexicalCode"),
2269             b => bless({
2270             letter => "b",
2271             name => "opening parenthesis",
2272             next => "bBpsv",
2273             short => "OpenBracket",
2274             }, "Tree::Term::LexicalCode"),
2275             B => bless({
2276             letter => "B",
2277             name => "closing parenthesis",
2278             next => "aBdqs",
2279             short => "CloseBracket",
2280             }, "Tree::Term::LexicalCode"),
2281             d => bless({ letter => "d", name => "dyadic operator", next => "bpv", short => "dyad" }, "Tree::Term::LexicalCode"),
2282             p => bless({ letter => "p", name => "prefix operator", next => "bpv", short => "prefix" }, "Tree::Term::LexicalCode"),
2283             q => bless({
2284             letter => "q",
2285             name => "suffix operator",
2286             next => "aBdqs",
2287             short => "suffix",
2288             }, "Tree::Term::LexicalCode"),
2289             s => bless({ letter => "s", name => "semi-colon", next => "bBpsv", short => "semiColon" }, "Tree::Term::LexicalCode"),
2290             t => bless({ letter => "t", name => "term", next => "aBdqs", short => "term" }, "Tree::Term::LexicalCode"),
2291             v => bless({ letter => "v", name => "variable", next => "aBdqs", short => "variable" }, "Tree::Term::LexicalCode"),
2292             }, "Tree::Term::Codes"),
2293             first => "bpsv",
2294             last => "Bqsv",
2295             }, "Tree::Term::LexicalStructure"),
2296             treeTermLexicals => 'fix',
2297             }, "Unisyn::Parse::Lexical::Tables");
2298 1         7 $a->{treeTermLexicals} = $a->{structure}{codes};
2299 1         4 $a;
2300             }}
2301              
2302             #-------------------------------------------------------------------------------
2303             # Export - eeee
2304             #-------------------------------------------------------------------------------
2305              
2306 1     1   12833 use Exporter qw(import);
  1         3  
  1         46  
2307              
2308 1     1   6 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         3  
  1         572  
2309              
2310             @ISA = qw(Exporter);
2311             @EXPORT = qw();
2312             @EXPORT_OK = qw();
2313             %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2314              
2315             # podDocumentation
2316             =pod
2317              
2318             =encoding utf-8
2319              
2320             =head1 Name
2321              
2322             Unisyn::Parse - Parse a Unisyn expression.
2323              
2324             =head1 Synopsis
2325              
2326             Parse the B expression:
2327              
2328             𝒂 ❴ 𝒃 ⟦𝒄⟨ 𝗮 𝑒𝑞𝑢𝑎𝑙𝑠 𝒅 𝗯 𝙙 𝐭𝐢𝐦𝐞𝐬 ⟪𝗰 𝐩𝐥𝐮𝐬 𝗱⟫⟢ 𝗲 𝑎𝑠𝑠𝑖𝑔𝑛 𝗳 𝐬𝐮𝐛 𝗴 𝙝⟩ 𝙘 ⟧ 𝙗 ❵ 𝙖
2329              
2330             To get:
2331              
2332             Suffix: 𝙖
2333             Term
2334             Prefix: 𝒂
2335             Term
2336             Brackets: ⦇⦈
2337             Term
2338             Term
2339             Suffix: 𝙗
2340             Term
2341             Prefix: 𝒃
2342             Term
2343             Brackets: ⦋⦌
2344             Term
2345             Term
2346             Suffix: 𝙘
2347             Term
2348             Prefix: 𝒄
2349             Term
2350             Brackets: ⦏⦐
2351             Term
2352             Term
2353             Semicolon
2354             Term
2355             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2356             Term
2357             Variable: 𝗮
2358             Term
2359             Dyad: 𝐭𝐢𝐦𝐞𝐬
2360             Term
2361             Suffix: 𝙙
2362             Term
2363             Prefix: 𝒅
2364             Term
2365             Variable: 𝗯
2366             Term
2367             Brackets: ⦓⦔
2368             Term
2369             Term
2370             Dyad: 𝐩𝐥𝐮𝐬
2371             Term
2372             Variable: 𝗰
2373             Term
2374             Variable: 𝗱
2375             Term
2376             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2377             Term
2378             Variable: 𝗲
2379             Term
2380             Dyad: 𝐬𝐮𝐛
2381             Term
2382             Variable: 𝗳
2383             Term
2384             Suffix: 𝙝
2385             Term
2386             Variable: 𝗴
2387              
2388             Then traverse the parse tree printing the type of each node:
2389              
2390             variable
2391             variable
2392             prefix_d
2393             suffix_d
2394             variable
2395             variable
2396             plus
2397             times
2398             equals
2399             variable
2400             variable
2401             variable
2402             sub
2403             assign
2404             semiColon
2405             brackets_3
2406             prefix_c
2407             suffix_c
2408             brackets_2
2409             prefix_b
2410             suffix_b
2411             brackets_1
2412             prefix_a
2413             suffix_a
2414              
2415             =head1 Description
2416              
2417             Parse a Unisyn expression.
2418              
2419              
2420             Version "20211013".
2421              
2422              
2423             The following sections describe the methods in each functional area of this
2424             module. For an alphabetic listing of all methods by name see L.
2425              
2426              
2427              
2428             =head1 Create
2429              
2430             Create a Unisyn parse of a utf8 string.
2431              
2432             =head2 create($address, %options)
2433              
2434             Create a new unisyn parse from a utf8 string.
2435              
2436             Parameter Description
2437             1 $address Address of a zero terminated utf8 source string to parse as a variable
2438             2 %options Parse options.
2439              
2440             B
2441              
2442              
2443              
2444             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2445              
2446              
2447             ok Assemble(debug => 0, eq => <
2448             Assign: 𝑎
2449             Term
2450             Variable: 𝗮
2451             Term
2452             Variable: 𝗯
2453             END
2454              
2455              
2456             =head1 Parse
2457              
2458             Parse Unisyn expressions
2459              
2460             =head1 Traverse
2461              
2462             Traverse the parse tree
2463              
2464             =head2 traverseParseTree($parse)
2465              
2466             Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
2467              
2468             Parameter Description
2469             1 $parse Parse tree
2470              
2471             B
2472              
2473              
2474             my $s = Rutf8 $Lex->{sampleText}{Adv}; # Ascii
2475             my $p = create K(address, $s), operators => \&printOperatorSequence;
2476              
2477             K(address, $s)->printOutZeroString;
2478             $p->dumpParseTree;
2479             $p->print;
2480              
2481             $p->traverseParseTree; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2482              
2483              
2484             Assemble(debug => 0, eq => <
2485             𝗮𝗮𝑒𝑞𝑢𝑎𝑙𝑠abc 123 𝐩𝐥𝐮𝐬𝘃𝗮𝗿
2486             Tree at: 0000 0000 0000 10D8 length: 0000 0000 0000 000B
2487             Keys: 0000 1118 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2488             Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0F18 0000 0009 0000 0AD8 0000 0009 0000 0004 0000 0006 0000 0002 0000 0005 0041 26A4 0000 0003 0000 0009
2489             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2490             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2491             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2492             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 26A4
2493             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0005
2494             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0002
2495             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0006
2496             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0004
2497             index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2498             index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0AD8 subTree
2499             index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2500             index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0F18 subTree
2501             Tree at: 0000 0000 0000 0AD8 length: 0000 0000 0000 0007
2502             Keys: 0000 0B18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2503             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0000 0000 0006 0041 176C 0000 0001 0000 0009
2504             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2505             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2506             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2507             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 176C
2508             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2509             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0000
2510             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0002
2511             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0000
2512             end
2513             Tree at: 0000 0000 0000 0F18 length: 0000 0000 0000 000B
2514             Keys: 0000 0F58 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2515             Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0DD8 0000 0009 0000 0C18 0000 0009 0000 0003 0000 0004 0000 0013 0000 0003 0041 2E40 0000 0003 0000 0009
2516             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2517             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2518             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2519             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 2E40
2520             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0003
2521             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0013
2522             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0004
2523             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0003
2524             index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2525             index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0C18 subTree
2526             index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2527             index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0DD8 subTree
2528             Tree at: 0000 0000 0000 0C18 length: 0000 0000 0000 0007
2529             Keys: 0000 0C58 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2530             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0001 0000 0007 0000 0008 0000 0002 0041 53FE 0000 0001 0000 0009
2531             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2532             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2533             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2534             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 53FE
2535             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0002
2536             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0008
2537             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0007
2538             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0001
2539             end
2540             Tree at: 0000 0000 0000 0DD8 length: 0000 0000 0000 0007
2541             Keys: 0000 0E18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2542             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0003 0000 0017 0000 0006 0041 176C 0000 0001 0000 0009
2543             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2544             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2545             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2546             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 176C
2547             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2548             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0017
2549             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0003
2550             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0002
2551             end
2552             end
2553             end
2554             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2555             Term
2556             Variable: 𝗮𝗮
2557             Term
2558             Dyad: 𝐩𝐥𝐮𝐬
2559             Term
2560             Ascii: abc 123
2561             Term
2562             Variable: 𝘃𝗮𝗿
2563             variable
2564             ascii
2565             variable
2566             plus
2567             equals
2568             END
2569              
2570             my $s = Rutf8 $Lex->{sampleText}{ws};
2571             my $p = create (K(address, $s), operators => \&printOperatorSequence);
2572              
2573             K(address, $s)->printOutZeroString; # Print input string
2574             $p->print; # Print parse
2575              
2576             $p->traverseParseTree; # Traverse tree printing terms # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2577              
2578              
2579             Assemble(debug => 0, eq => <
2580             𝗮𝑎𝑠𝑠𝑖𝑔𝑛⌊〈❨𝗯𝗽❩〉𝐩𝐥𝐮𝐬❪𝘀𝗰❫⌋⟢𝗮𝗮𝑎𝑠𝑠𝑖𝑔𝑛❬𝗯𝗯𝐩𝐥𝐮𝐬𝗰𝗰❭⟢
2581             Semicolon
2582             Term
2583             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2584             Term
2585             Variable: 𝗮
2586             Term
2587             Brackets: ⌊⌋
2588             Term
2589             Term
2590             Dyad: 𝐩𝐥𝐮𝐬
2591             Term
2592             Brackets: ❨❩
2593             Term
2594             Term
2595             Brackets: ❬❭
2596             Term
2597             Term
2598             Variable: 𝗯𝗽
2599             Term
2600             Brackets: ❰❱
2601             Term
2602             Term
2603             Variable: 𝘀𝗰
2604             Term
2605             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2606             Term
2607             Variable: 𝗮𝗮
2608             Term
2609             Brackets: ❴❵
2610             Term
2611             Term
2612             Dyad: 𝐩𝐥𝐮𝐬
2613             Term
2614             Variable: 𝗯𝗯
2615             Term
2616             Variable: 𝗰𝗰
2617             variable
2618             variable
2619             variable
2620             plus
2621             assign
2622             variable
2623             variable
2624             variable
2625             plus
2626             assign
2627             semiColon
2628             END
2629              
2630              
2631             =head2 makeExecutionChain($parse)
2632              
2633             Traverse the parse tree in post order to create an execution chain.
2634              
2635             Parameter Description
2636             1 $parse Parse tree
2637              
2638             =head1 Print
2639              
2640             Print a parse tree
2641              
2642             =head2 print($parse)
2643              
2644             Print a parse tree.
2645              
2646             Parameter Description
2647             1 $parse Parse tree
2648              
2649             B
2650              
2651              
2652              
2653             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2654              
2655              
2656             ok Assemble(debug => 0, eq => <
2657             Assign: 𝑎
2658             Term
2659             Variable: 𝗮
2660             Term
2661             Variable: 𝗯
2662             END
2663              
2664              
2665             =head2 dumpParseTree($parse)
2666              
2667             Dump the parse tree.
2668              
2669             Parameter Description
2670             1 $parse Parse tree
2671              
2672             =head1 Execute
2673              
2674             Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
2675              
2676             =head2 lexToSub($parse, $alphabet, $op, $sub)
2677              
2678             Map a lexical item to a processing subroutine.
2679              
2680             Parameter Description
2681             1 $parse Sub quarks
2682             2 $alphabet The alphabet number
2683             3 $op The operator name in that alphabet
2684             4 $sub Subroutine definition
2685              
2686             =head2 dyad($parse, $text, $sub)
2687              
2688             Define a method for a dyadic operator.
2689              
2690             Parameter Description
2691             1 $parse Sub quarks
2692             2 $text The name of the operator as a utf8 string
2693             3 $sub Associated subroutine definition
2694              
2695             =head2 assign($parse, $text, $sub)
2696              
2697             Define a method for an assign operator.
2698              
2699             Parameter Description
2700             1 $parse Sub quarks
2701             2 $text The name of the operator as a utf8 string
2702             3 $sub Associated subroutine definition
2703              
2704             =head2 prefix($parse, $text, $sub)
2705              
2706             Define a method for a prefix operator.
2707              
2708             Parameter Description
2709             1 $parse Sub quarks
2710             2 $text The name of the operator as a utf8 string
2711             3 $sub Associated subroutine definition
2712              
2713             =head2 suffix($parse, $text, $sub)
2714              
2715             Define a method for a suffix operator.
2716              
2717             Parameter Description
2718             1 $parse Sub quarks
2719             2 $text The name of the operator as a utf8 string
2720             3 $sub Associated subroutine definition
2721              
2722             =head2 ascii($parse, $sub)
2723              
2724             Define a method for ascii text.
2725              
2726             Parameter Description
2727             1 $parse Sub quarks
2728             2 $sub Associated subroutine definition
2729              
2730             =head2 semiColon($parse, $sub)
2731              
2732             Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
2733              
2734             Parameter Description
2735             1 $parse Sub quarks
2736             2 $sub Associated subroutine definition
2737              
2738             =head2 variable($parse, $sub)
2739              
2740             Define a method for a variable.
2741              
2742             Parameter Description
2743             1 $parse Sub quarks
2744             2 $sub Associated subroutine definition
2745              
2746             =head2 bracket($parse, $open, $sub)
2747              
2748             Define a method for a bracket operator.
2749              
2750             Parameter Description
2751             1 $parse Sub quarks
2752             2 $open Opening parenthesis
2753             3 $sub Associated subroutine
2754              
2755             =head1 Alphabets
2756              
2757             Translate between alphabets.
2758              
2759             =head2 asciiToAssignLatin($in)
2760              
2761             Translate ascii to the corresponding letters in the assign latin alphabet.
2762              
2763             Parameter Description
2764             1 $in A string of ascii
2765              
2766             =head2 asciiToAssignGreek($in)
2767              
2768             Translate ascii to the corresponding letters in the assign greek alphabet.
2769              
2770             Parameter Description
2771             1 $in A string of ascii
2772              
2773             =head2 asciiToDyadLatin($in)
2774              
2775             Translate ascii to the corresponding letters in the dyad latin alphabet.
2776              
2777             Parameter Description
2778             1 $in A string of ascii
2779              
2780             =head2 asciiToDyadGreek($in)
2781              
2782             Translate ascii to the corresponding letters in the dyad greek alphabet.
2783              
2784             Parameter Description
2785             1 $in A string of ascii
2786              
2787             =head2 asciiToPrefixLatin($in)
2788              
2789             Translate ascii to the corresponding letters in the prefix latin alphabet.
2790              
2791             Parameter Description
2792             1 $in A string of ascii
2793              
2794             =head2 asciiToPrefixGreek($in)
2795              
2796             Translate ascii to the corresponding letters in the prefix greek alphabet.
2797              
2798             Parameter Description
2799             1 $in A string of ascii
2800              
2801             =head2 asciiToSuffixLatin($in)
2802              
2803             Translate ascii to the corresponding letters in the suffix latin alphabet.
2804              
2805             Parameter Description
2806             1 $in A string of ascii
2807              
2808             =head2 asciiToSuffixGreek($in)
2809              
2810             Translate ascii to the corresponding letters in the suffix greek alphabet.
2811              
2812             Parameter Description
2813             1 $in A string of ascii
2814              
2815             =head2 asciiToVariableLatin($in)
2816              
2817             Translate ascii to the corresponding letters in the suffix latin alphabet.
2818              
2819             Parameter Description
2820             1 $in A string of ascii
2821              
2822             =head2 asciiToVariableGreek($in)
2823              
2824             Translate ascii to the corresponding letters in the suffix greek alphabet.
2825              
2826             Parameter Description
2827             1 $in A string of ascii
2828              
2829             =head2 asciiToEscaped($in)
2830              
2831             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2832              
2833             Parameter Description
2834             1 $in A string of ascii
2835              
2836             =head2 semiColonChar()
2837              
2838             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2839              
2840              
2841             =head2 printOperatorSequence($parse)
2842              
2843             Print the operator calling sequence.
2844              
2845             Parameter Description
2846             1 $parse Parse
2847              
2848             =head2 executeOperator($parse)
2849              
2850             Print the operator calling sequence.
2851              
2852             Parameter Description
2853             1 $parse Parse
2854              
2855              
2856             =head1 Hash Definitions
2857              
2858              
2859              
2860              
2861             =head2 Unisyn::Parse Definition
2862              
2863              
2864             Description of parse
2865              
2866              
2867              
2868              
2869             =head3 Output fields
2870              
2871              
2872             =head4 address8
2873              
2874             Address of source string as utf8
2875              
2876             =head4 arena
2877              
2878             Arena containing tree
2879              
2880             =head4 fails
2881              
2882             Number of failures encountered in this parse
2883              
2884             =head4 operators
2885              
2886             Methods implementing each lexical operator
2887              
2888             =head4 parse
2889              
2890             Offset to the head of the parse tree
2891              
2892             =head4 quarks
2893              
2894             Quarks representing the strings used in this parse
2895              
2896             =head4 size8
2897              
2898             Size of source string as utf8
2899              
2900             =head4 source32
2901              
2902             Source text as utf32
2903              
2904             =head4 sourceLength32
2905              
2906             Length of utf32 string
2907              
2908             =head4 sourceSize32
2909              
2910             Size of utf32 allocation
2911              
2912             =head4 width
2913              
2914             Size of entries in exec chain
2915              
2916              
2917              
2918             =head1 Private Methods
2919              
2920             =head2 getAlpha($register, $address, $index)
2921              
2922             Load the position of a lexical item in its alphabet from the current character.
2923              
2924             Parameter Description
2925             1 $register Register to load
2926             2 $address Address of start of string
2927             3 $index Index into string
2928              
2929             =head2 getLexicalCode($register, $address, $index)
2930              
2931             Load the lexical code of the current character in memory into the specified register.
2932              
2933             Parameter Description
2934             1 $register Register to load
2935             2 $address Address of start of string
2936             3 $index Index into string
2937              
2938             =head2 putLexicalCode($register, $address, $index, $code)
2939              
2940             Put the specified lexical code into the current character in memory.
2941              
2942             Parameter Description
2943             1 $register Register used to load code
2944             2 $address Address of string
2945             3 $index Index into string
2946             4 $code Code to put
2947              
2948             =head2 loadCurrentChar()
2949              
2950             Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
2951              
2952              
2953             =head2 checkStackHas($depth)
2954              
2955             Check that we have at least the specified number of elements on the stack.
2956              
2957             Parameter Description
2958             1 $depth Number of elements required on the stack
2959              
2960             =head2 pushElement()
2961              
2962             Push the current element on to the stack.
2963              
2964              
2965             =head2 pushEmpty()
2966              
2967             Push the empty element on to the stack.
2968              
2969              
2970             =head2 lexicalNameFromLetter($l)
2971              
2972             Lexical name for a lexical item described by its letter.
2973              
2974             Parameter Description
2975             1 $l Letter of the lexical item
2976              
2977             =head2 lexicalNumberFromLetter($l)
2978              
2979             Lexical number for a lexical item described by its letter.
2980              
2981             Parameter Description
2982             1 $l Letter of the lexical item
2983              
2984             =head2 lexicalItemLength($source32, $offset)
2985              
2986             Put the length of a lexical item into variable B.
2987              
2988             Parameter Description
2989             1 $source32 B
of utf32 source representation
2990             2 $offset B to lexical item in utf32
2991              
2992             =head2 new($depth, $description)
2993              
2994             Create a new term in the parse tree rooted on the stack.
2995              
2996             Parameter Description
2997             1 $depth Stack depth to be converted
2998             2 $description Text reason why we are creating a new term
2999              
3000             =head2 error($message)
3001              
3002             Write an error message and stop.
3003              
3004             Parameter Description
3005             1 $message Error message
3006              
3007             =head2 testSet($set, $register)
3008              
3009             Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
3010              
3011             Parameter Description
3012             1 $set Set of lexical letters
3013             2 $register Register to test
3014              
3015             =head2 checkSet($set)
3016              
3017             Check that one of a set of items is on the top of the stack or complain if it is not.
3018              
3019             Parameter Description
3020             1 $set Set of lexical letters
3021              
3022             =head2 reduce($priority)
3023              
3024             Convert the longest possible expression on top of the stack into a term at the specified priority.
3025              
3026             Parameter Description
3027             1 $priority Priority of the operators to reduce
3028              
3029             =head2 reduceMultiple($priority)
3030              
3031             Reduce existing operators on the stack.
3032              
3033             Parameter Description
3034             1 $priority Priority of the operators to reduce
3035              
3036             =head2 accept_a()
3037              
3038             Assign.
3039              
3040              
3041             =head2 accept_b()
3042              
3043             Open.
3044              
3045              
3046             =head2 accept_B()
3047              
3048             Closing parenthesis.
3049              
3050              
3051             =head2 accept_d()
3052              
3053             Infix but not assign or semi-colon.
3054              
3055              
3056             =head2 accept_p()
3057              
3058             Prefix.
3059              
3060              
3061             =head2 accept_q()
3062              
3063             Post fix.
3064              
3065              
3066             =head2 accept_s()
3067              
3068             Semi colon.
3069              
3070              
3071             =head2 accept_v()
3072              
3073             Variable.
3074              
3075              
3076             =head2 parseExpression()
3077              
3078             Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
3079              
3080              
3081             =head2 MatchBrackets(@parameters)
3082              
3083             Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
3084              
3085             Parameter Description
3086             1 @parameters Parameters
3087              
3088             =head2 ClassifyNewLines(@parameters)
3089              
3090             Scan input string looking for opportunities to convert new lines into semi colons.
3091              
3092             Parameter Description
3093             1 @parameters Parameters
3094              
3095             =head2 ClassifyWhiteSpace(@parameters)
3096              
3097             Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
3098              
3099             Parameter Description
3100             1 @parameters Parameters
3101              
3102             =head2 reload($parse, $parameters)
3103              
3104             Reload the variables associated with a parse.
3105              
3106             Parameter Description
3107             1 $parse Parse
3108             2 $parameters Hash of variable parameters
3109              
3110             =head2 parseUtf8($parse, @parameters)
3111              
3112             Parse a unisyn expression encoded as utf8 and return the parse tree.
3113              
3114             Parameter Description
3115             1 $parse Parse
3116             2 @parameters Parameters
3117              
3118             =head2 printExecChain($parse)
3119              
3120             Print the execute chain for a parse
3121              
3122             Parameter Description
3123             1 $parse Parse tree
3124              
3125             =head2 printLexicalItem($parse, $source32, $offset, $size)
3126              
3127             Print the utf8 string corresponding to a lexical item at a variable offset.
3128              
3129             Parameter Description
3130             1 $parse Parse tree
3131             2 $source32 B
of utf32 source representation
3132             3 $offset B to lexical item in utf32
3133             4 $size B in utf32 chars of item
3134              
3135             =head2 showAlphabet($alphabet)
3136              
3137             Show an alphabet.
3138              
3139             Parameter Description
3140             1 $alphabet Alphabet name
3141              
3142             =head2 T($key, $expected, %options)
3143              
3144             Parse some text and dump the results.
3145              
3146             Parameter Description
3147             1 $key Key of text to be parsed
3148             2 $expected Expected result
3149             3 %options Options
3150              
3151             =head2 C($key, $expected, %options)
3152              
3153             Parse some text and print the results.
3154              
3155             Parameter Description
3156             1 $key Key of text to be parsed
3157             2 $expected Expected result
3158             3 %options Options
3159              
3160              
3161             =head1 Index
3162              
3163              
3164             1 L - Assign.
3165              
3166             2 L - Closing parenthesis.
3167              
3168             3 L - Open.
3169              
3170             4 L - Infix but not assign or semi-colon.
3171              
3172             5 L - Prefix.
3173              
3174             6 L - Post fix.
3175              
3176             7 L - Semi colon.
3177              
3178             8 L - Variable.
3179              
3180             9 L - Define a method for ascii text.
3181              
3182             10 L - Translate ascii to the corresponding letters in the assign greek alphabet.
3183              
3184             11 L - Translate ascii to the corresponding letters in the assign latin alphabet.
3185              
3186             12 L - Translate ascii to the corresponding letters in the dyad greek alphabet.
3187              
3188             13 L - Translate ascii to the corresponding letters in the dyad latin alphabet.
3189              
3190             14 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3191              
3192             15 L - Translate ascii to the corresponding letters in the prefix greek alphabet.
3193              
3194             16 L - Translate ascii to the corresponding letters in the prefix latin alphabet.
3195              
3196             17 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3197              
3198             18 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3199              
3200             19 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3201              
3202             20 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3203              
3204             21 L - Define a method for an assign operator.
3205              
3206             22 L - Define a method for a bracket operator.
3207              
3208             23 L - Parse some text and print the results.
3209              
3210             24 L - Check that one of a set of items is on the top of the stack or complain if it is not.
3211              
3212             25 L - Check that we have at least the specified number of elements on the stack.
3213              
3214             26 L - Scan input string looking for opportunities to convert new lines into semi colons.
3215              
3216             27 L - Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.
3217              
3218             28 L - Create a new unisyn parse from a utf8 string.
3219              
3220             29 L - Dump the parse tree.
3221              
3222             30 L - Define a method for a dyadic operator.
3223              
3224             31 L - Write an error message and stop.
3225              
3226             32 L - Print the operator calling sequence.
3227              
3228             33 L - Load the position of a lexical item in its alphabet from the current character.
3229              
3230             34 L - Load the lexical code of the current character in memory into the specified register.
3231              
3232             35 L - Put the length of a lexical item into variable B.
3233              
3234             36 L - Lexical name for a lexical item described by its letter.
3235              
3236             37 L - Lexical number for a lexical item described by its letter.
3237              
3238             38 L - Map a lexical item to a processing subroutine.
3239              
3240             39 L - Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
3241              
3242             40 L - Traverse the parse tree in post order to create an execution chain.
3243              
3244             41 L - Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket.
3245              
3246             42 L - Create a new term in the parse tree rooted on the stack.
3247              
3248             43 L - Parse the string of classified lexical items addressed by register $start of length $length.
3249              
3250             44 L - Parse a unisyn expression encoded as utf8 and return the parse tree.
3251              
3252             45 L - Define a method for a prefix operator.
3253              
3254             46 L - Print a parse tree.
3255              
3256             47 L - Print the execute chain for a parse
3257              
3258             48 L - Print the utf8 string corresponding to a lexical item at a variable offset.
3259              
3260             49 L - Print the operator calling sequence.
3261              
3262             50 L - Push the current element on to the stack.
3263              
3264             51 L - Push the empty element on to the stack.
3265              
3266             52 L - Put the specified lexical code into the current character in memory.
3267              
3268             53 L - Convert the longest possible expression on top of the stack into a term at the specified priority.
3269              
3270             54 L - Reduce existing operators on the stack.
3271              
3272             55 L - Reload the variables associated with a parse.
3273              
3274             56 L - Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
3275              
3276             57 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3277              
3278             58 L - Show an alphabet.
3279              
3280             59 L - Define a method for a suffix operator.
3281              
3282             60 L - Parse some text and dump the results.
3283              
3284             61 L - Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
3285              
3286             62 L - Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
3287              
3288             63 L - Define a method for a variable.
3289              
3290             =head1 Installation
3291              
3292             This module is written in 100% Pure Perl and, thus, it is easy to read,
3293             comprehend, use, modify and install via B:
3294              
3295             sudo cpan install Unisyn::Parse
3296              
3297             =head1 Author
3298              
3299             L
3300              
3301             L
3302              
3303             =head1 Copyright
3304              
3305             Copyright (c) 2016-2021 Philip R Brenan.
3306              
3307             This module is free software. It may be used, redistributed and/or modified
3308             under the same terms as Perl itself.
3309              
3310             =cut
3311              
3312              
3313              
3314             # Tests and documentation
3315              
3316             sub test
3317 1     1 0 10 {my $p = __PACKAGE__;
3318 1         11 binmode($_, ":utf8") for *STDOUT, *STDERR;
3319 1 50       77 return if eval "eof(${p}::DATA)";
3320 1         80 my $s = eval "join('', <${p}::DATA>)";
3321 1 50       28 $@ and die $@;
3322 1 0   1 1 7 eval $s;
  1     1 1 2  
  1     0 1 10  
  1     0 1 772  
  1     0   68212  
  1     0   10  
  1     0   154  
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
3323 0 0         $@ and die $@;
3324 0           1
3325             }
3326              
3327             test unless caller;
3328              
3329             1;
3330             # podDocumentation
3331             __DATA__