File Coverage

blib/lib/Unisyn/Parse.pm
Criterion Covered Total %
statement 49 1070 4.5
branch 2 100 2.0
condition 0 9 0.0
subroutine 15 98 15.3
pod 59 61 96.7
total 125 1338 9.3


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/NasmX86/lib/ -I/home/phil/perl/cpan/AsmC/lib/
2             #-------------------------------------------------------------------------------
3             # Parse a Unisyn expression.
4             # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             # Finished in 13.14s, bytes: 2,655,008, execs: 465,858
8             # Can we remove more Pushr by doing one big save in parseutf8 ?
9             package Unisyn::Parse;
10             our $VERSION = "20210927";
11 1     1   26023 use warnings FATAL => qw(all);
  1         11  
  1         53  
12 1     1   5 use strict;
  1         1  
  1         34  
13 1     1   6 use Carp qw(confess cluck);
  1         1  
  1         91  
14 1     1   492 use Data::Dump qw(dump);
  1         6597  
  1         88  
15 1     1   3494 use Data::Table::Text qw(:all !parse);
  1         124239  
  1         1705  
16 1     1   5691 use Nasm::X86 qw(:all);
  1         140555  
  1         2740  
17 1     1   32 use feature qw(say current_sub);
  1         2  
  1         97  
18 1     1   7 use utf8;
  1         1  
  1         8  
19              
20             makeDieConfess;
21              
22             my $develop = -e q(/home/phil/); # Developing
23             our $Parse; # One of the advantages of creating a parse tree is that we can perform parse one at a time making it safe to globalize this variable. The alternative is to pass this variable between all the parsing calls which would obscure their workings greatly.
24             our $ParseUtf8SubDef; # The definition of the subroutine that does the parsing so that we can reuse its parameters when we call L.
25             our $debug = 0; # Print evolution of stack if true.
26              
27             #D1 Create # Create a Unisyn parse of a utf8 string.
28              
29             sub create($%) # Create a new unisyn parse from a utf8 string.
30 0     0 1 0 {my ($address, %options) = @_; # Address of a zero terminated utf8 source string to parse as a variable, parse options.
31 0 0       0 @_ >= 1 or confess "One or more parameters";
32              
33 0         0 my $a = CreateArena; # Arena to hold parse tree - every parse tree gets its own arena so that we can free parses separately
34 0         0 my $size = StringLength string => $address; # Length of input utf8
35              
36 0         0 my $p = $Parse = genHash(__PACKAGE__, # Description of parse
37             arena => $a, # Arena containing tree
38             size8 => $size, # Size of source string as utf8
39             address8 => $address, # Address of source string as utf8
40             source32 => V(source32), # Source text as utf32
41             sourceSize32 => V(sourceSize32), # Size of utf32 allocation
42             sourceLength32 => V(sourceLength32), # Length of utf32 string
43             parse => V('parse'), # Offset to the head of the parse tree
44             fails => V('fail'), # Number of failures encountered in this parse
45             quarks => $a->CreateQuarks, # Quarks representing the strings used in this parse
46             operators => undef, # Methods implementing each lexical operator
47             );
48              
49 0 0       0 if (my $o = $options{operators}) # Operator methods for lexical items
50 0         0 {$p->operators = $a->CreateSubQuarks; # Create quark set to translate operator names to offsets
51 0         0 $o->($p);
52             }
53              
54 0         0 $p->parseUtf8; # Parse utf8 source string
55              
56 0         0 $p
57             }
58              
59             #D1 Parse # Parse Unisyn expressions
60              
61             our $Lex = &lexicalData; # Lexical table definitions
62              
63             our $ses = RegisterSize rax; # Size of an element on the stack
64             our ($w1, $w2, $w3) = (r8, r9, r10); # Work registers
65             our $prevChar = r11; # The previous character parsed
66             our $index = r12; # Index of current element
67             our $element = r13; # Contains the item being parsed
68             our $start = r14; # Start of the parse string
69             our $size = r15; # Length of the input string
70             our $parseStackBase = rsi; # The base of the parsing stack in the stack
71             #ur $arenaReg = rax; # The arena in which we are building the parse tree
72             our $indexScale = 4; # The size of a utf32 character
73             our $lexCodeOffset = 3; # The offset in a classified character to the lexical code.
74             our $bitsPerByte = 8; # The number of bits in a byte
75              
76             our $Ascii = $$Lex{lexicals}{Ascii} {number}; # Ascii
77             our $assign = $$Lex{lexicals}{assign} {number}; # Assign
78             our $dyad = $$Lex{lexicals}{dyad} {number}; # Dyad
79             our $CloseBracket = $$Lex{lexicals}{CloseBracket} {number}; # Close bracket
80             our $empty = $$Lex{lexicals}{empty} {number}; # Empty element
81             our $NewLineSemiColon = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semicolon
82             our $OpenBracket = $$Lex{lexicals}{OpenBracket} {number}; # Open bracket
83             our $prefix = $$Lex{lexicals}{prefix} {number}; # Prefix operator
84             our $semiColon = $$Lex{lexicals}{semiColon} {number}; # Semicolon
85             our $suffix = $$Lex{lexicals}{suffix} {number}; # Suffix
86             our $term = $$Lex{lexicals}{term} {number}; # Term
87             our $variable = $$Lex{lexicals}{variable} {number}; # Variable
88             our $WhiteSpace = $$Lex{lexicals}{WhiteSpace} {number}; # Variable
89             our $firstSet = $$Lex{structure}{first}; # First symbols allowed
90             our $lastSet = $$Lex{structure}{last}; # Last symbols allowed
91             our $bracketsBase = $$Lex{bracketsBase}; # Base lexical item for brackets
92              
93             our $asciiNewLine = ord("\n"); # New line in ascii
94             our $asciiSpace = ord(' '); # Space in ascii
95              
96             our $lexItemType = 0; # Field number of lexical item type in the description of a lexical item
97             our $lexItemOffset = 1; # Field number of the offset in the utf32 source of the lexical item in the description of a lexical item or - if this a term - the offset of the invariant first block of the sub tree
98             our $lexItemLength = 2; # Field number of the length of the lexical item in the utf32 source in the description of a lexical item
99             our $lexItemQuark = 3; # Quark containing the text of this lexical item.
100             our $lexItemWidth = 4; # The number of fields used to describe a lexical item in the parse tree
101              
102             our $opType = 0; # Operator type field - currently always a term
103             our $opCount = 1; # Number of operands for this operator
104             our $opSub = 2; # Offset of sub associated with this lexical item
105              
106             sub getAlpha($$$) #P Load the position of a lexical item in its alphabet from the current character.
107 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
108 0         0 Mov $register, "[$address+$indexScale*$index]"; # Load lexical code
109             }
110              
111             sub getLexicalCode($$$) #P Load the lexical code of the current character in memory into the specified register.
112 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
113 0         0 Mov $register, "[$address+$indexScale*$index+$lexCodeOffset]"; # Load lexical code
114             }
115              
116             sub putLexicalCode($$$$) #P Put the specified lexical code into the current character in memory.
117 0     0 1 0 {my ($register, $address, $index, $code) = @_; # Register used to load code, address of string, index into string, code to put
118 0         0 Mov $register, $code;
119 0         0 Mov "[$address+$indexScale*$index+$lexCodeOffset]", $register; # Save lexical code
120             }
121              
122             sub loadCurrentChar() #P Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
123 0     0 1 0 {my $r = $element."b"; # Classification byte
124              
125 0         0 Mov $element, $index; # Load index of character as upper dword
126 0         0 Shl $element, $indexScale * $bitsPerByte; # Save the index of the character in the upper half of the register so that we know where the character came from.
127 0         0 getLexicalCode $r, $start, $index; # Load lexical classification as lowest byte
128              
129 0         0 Cmp $r, $bracketsBase; # Brackets , due to their frequency, start after 0x10 with open even and close odd
130             IfGe # Brackets
131             Then
132 0     0   0 {And $r, 1 # Bracket: 0 - open, 1 - close
133             },
134             Else
135 0     0   0 {Cmp $r, $Ascii; # Ascii is a type of variable
136             IfEq
137             Then
138 0         0 {Mov $r, $variable;
139             },
140             Else
141 0         0 {Cmp $r, $NewLineSemiColon; # New line semicolon is a type of semi colon
142             IfEq
143             Then
144 0         0 {Mov $r, $semiColon;
145 0         0 };
146 0         0 };
147 0         0 };
148             }
149              
150             sub checkStackHas($) #P Check that we have at least the specified number of elements on the stack.
151 0     0 1 0 {my ($depth) = @_; # Number of elements required on the stack
152 0         0 Mov $w1, $parseStackBase;
153 0         0 Sub $w1, rsp;
154 0         0 Cmp $w1, $ses * $depth;
155             }
156              
157             sub pushElement() #P Push the current element on to the stack.
158 0     0 1 0 {Push $element;
159 0 0       0 if ($debug)
160 0         0 {PrintErrStringNL "Push Element:";
161 0         0 PrintErrRegisterInHex $element;
162             }
163             }
164              
165             sub pushEmpty() #P Push the empty element on to the stack.
166 0     0 1 0 {Mov $w1, $index;
167 0         0 Shl $w1, $indexScale * $bitsPerByte;
168 0         0 Or $w1, $empty;
169 0         0 Push $w1;
170 0 0       0 if ($debug)
171 0         0 {PrintErrStringNL "Push Empty";
172             }
173             }
174              
175             sub lexicalNameFromLetter($) #P Lexical name for a lexical item described by its letter.
176 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
177 0         0 my %l = $Lex->{treeTermLexicals}->%*;
178 0         0 my $n = $l{$l};
179 0 0       0 confess "No such lexical: $l" unless $n;
180             $n->{short}
181 0         0 }
182              
183             sub lexicalNumberFromLetter($) #P Lexical number for a lexical item described by its letter.
184 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
185 0         0 my $n = lexicalNameFromLetter $l;
186 0         0 my $N = $Lex->{lexicals}{$n}{number};
187 0 0       0 confess "No such lexical named: $n" unless defined $N;
188 0         0 $N
189             }
190              
191             sub lexicalItemLength($$) #P Put the length of a lexical item into variable B.
192 0     0 1 0 {my ($source32, $offset) = @_; # B
of utf32 source representation, B to lexical item in utf32
193              
194             my $s = Subroutine
195 0     0   0 {my ($p, $s) = @_; # Parameters
196             # PushR r14, r15; # We do not need to save the zmm and mask registers because they are only used as temporary work registers and they have been saved in L
197              
198 0         0 $$p{source32}->setReg(r14);
199 0         0 $$p{offset} ->setReg(r15);
200 0         0 Vmovdqu8 zmm0, "[r14+4*r15]"; # Load source to examine
201 0         0 Pextrw r15, xmm0, 1; # Extract lexical type of first element
202              
203             OrBlock # The size of a bracket or a semi colon is always 1
204 0         0 {my ($pass, $end, $start) = @_;
205 0         0 Cmp r15, $OpenBracket;
206 0         0 Je $pass;
207 0         0 Cmp r15, $CloseBracket;
208 0         0 Je $pass;
209 0         0 Cmp r15, $semiColon;
210 0         0 Je $pass;
211              
212 0         0 Vpbroadcastw zmm1, r15w; # Broadcast lexical type
213 0         0 Vpcmpeqw k0, zmm0, zmm1; # Check extent of first lexical item up to 16
214 0         0 Mov r15, 0x55555555; # Set odd positions to one where we know the match will fail
215 0         0 Kmovq k1, r15;
216 0         0 Korq k2, k0, k1; # Fill in odd positions
217              
218 0         0 Kmovq r15, k2;
219 0         0 Not r15; # Swap zeroes and ones
220 0         0 Tzcnt r15, r15; # Trailing zero count is a factor two too big
221 0         0 Shr r15, 1; # Normalized count of number of characters in lexical item
222 0         0 $$p{size}->getReg(r15); # Save size in supplied variable
223             }
224             Pass # Show unitary length
225 0         0 {my ($end, $pass, $start) = @_;
226 0         0 $$p{size}->getConst(1); # Save size in supplied variable
227 0         0 };
228              
229             # PopR;
230 0         0 } [qw(offset source32 size)],
231             name => q(Unisyn::Parse::lexicalItemLength);
232              
233 0         0 $s->call(offset => $offset, source32 => $source32, my $size = V(size));
234              
235 0         0 $size
236             }
237              
238             sub new($$) #P Create a new term in the parse tree rooted on the stack.
239 0     0 1 0 {my ($depth, $description) = @_; # Stack depth to be converted, text reason why we are creating a new term
240              
241 0         0 my $wr = RegisterSize rax; # Width of general purpose register
242              
243             my $s = Subroutine
244 0     0   0 {my ($locals) = @_; # Parameters
245 0         0 my $a = DescribeArena $$locals{bs}; # Address arena
246              
247             my $quarks = $Parse->quarks->reload(arena => $$locals{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
248             array => $$locals{numbersToStringsFirst},
249 0         0 tree => $$locals{stringsToNumbersFirst});
250              
251             my $operators = $Parse->operators ? $Parse->operators->reload # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
252             (arena => $$locals{bs},
253             array => $$locals{opNumbersToStringsFirst},
254 0 0       0 tree => $$locals{opStringsToNumbersFirst}) : undef;
255              
256 0         0 my $t = $a->CreateTree; # Create a tree in the arena to hold the details of the lexical elements on the stack
257 0         0 my $o = V(offset); # Offset into source for lexical item
258 0         0 $t->insert(V(key, $opType), K(data, $term)); # Create a term - we only have terms at the moment in the parse tree - but that might change in the future
259 0         0 $t->insert(V(key, $opCount), K(data, $depth)); # The number of elements in the term which is the number of operands for the operator
260              
261 0         0 my $liOnStack = $w1; # The lexical item as it appears on the stack
262 0         0 my $liType = $w2; # The lexical item type
263 0         0 my $liOffset = $w3; # The lexical item offset in the source
264              
265 0         0 PushR zmm0; # Put the simulated stack on the stack
266              
267 0         0 for my $i(1..$depth) # Each term
268 0         0 {my $j = $depth + 1 - $i;
269 0         0 my $k = ($i - 1) * $wr; # Position in simulated stack
270 0         0 Mov $liOnStack, "[rsp+$k]"; # Copy term out of simulated stack
271 0 0       0 PrintErrRegisterInHex $liOnStack if $debug;
272              
273 0         0 Mov $liOffset, $liOnStack; # Offset of either the text in the source or the offset of the first block of the tree describing a term
274 0         0 Shr $liOffset, 32; # Offset in source: either the actual text of the offset of the first block of the tree containing a term shifted over to look as if it were an offset in the source
275 0         0 $o->getReg($liOffset); # Offset of lexical item in source or offset of first block in tree describing a term
276              
277 0         0 ClearRegisters $liType;
278 0         0 Mov $liType."b", $liOnStack."b"; # The lexical item type in the lowest byte, the rest clear.
279              
280 0         0 Cmp $liType, $term; # Check whether the lexical item on the stack is a term
281             IfEq # Insert a sub tree if we are inserting a term
282             Then
283 0         0 {$t->insertTree(K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset of first block in the tree representing the term
284             },
285             Else # Insert the offset in the utf32 source if we are not on a term
286 0         0 {$t->insert (K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset in source of non term
287 0         0 };
288              
289 0         0 Cmp $liType, $variable; # Check whether the lexical item is a variable which can also represent ascii
290             IfEq # Insert a sub tree if we are inserting a term
291             Then
292 0         0 {Mov $liType."b", "[$start+4*$liOffset+3]"; # Load lexical type from source
293 0         0 };
294              
295 0         0 Cmp $liType, $term; # Length of lexical item that is not a term
296             IfNe
297             Then # Not a term
298 0         0 {my $size = lexicalItemLength(V(address, $start), $o); # Get the size of the lexical item at the offset indicated on the stack
299 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemLength), $size); # Save size of lexical item in parse tree
300              
301 0         0 my $s = CreateShortString(1); # Short string to hold text of lexical item so we can load it into a quark
302 0         0 $s->clear; # Perhaps not strictly necessary but easier to debug
303 0         0 PushR r15; # Probably not needed as saved in L
304 0 0 0     0 r15 ne $start && r15 ne $liOffset or confess "r15 in use";
305 0         0 Lea r15, "[$start+4*$liOffset]"; # Start address of lexical item
306 0         0 my $startAddress = V(address, r15); # Save start address of lexical item
307 0         0 PopR;
308              
309 0         0 Cmp $liType, $OpenBracket; # Is it a bracket ?
310             IfEq
311             Then
312 0         0 {ClearRegisters $liType; # Compute lexical type of bracket by adding bracket number to the start of the bracket range
313 0         0 Mov $liType."b", "[$start+4*$liOffset+3]"; # Load bracket number
314 0         0 Shl $liType, 16; # Shift bracket base into position
315 0         0 Add $liType, 2; # Set length of short string as two = (lexical type, bracket number)
316 0         0 Pinsrd "xmm1", $liType."d", 0; # Load short string
317 0         0 Shr $liType, 16; # Move lexical type back into position for insertion into the parse tree
318             },
319             Else
320 0         0 {$s->loadDwordBytes(0, $startAddress, $size, 1); # Load text of lexical item into short string leaving space for lexical type
321 0         0 Pinsrb "xmm1", $liType."b", 1; # Set lexical type as the first byte of the short string
322 0         0 };
323              
324 0         0 my $q = $quarks->quarkFromShortString($s); # Find the quark matching the lexical item if there is such a quark
325 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemQuark), $q); # Save quark number of lexical item in parse tree
326 0 0       0 if ($operators) # The parse has operator definitions
327 0 0       0 {if ($j == 1) # The operator quark is always first
328             {OrBlock # Like an operator or like a variable?
329 0         0 {my ($pass, $end, $start) = @_;
330 0         0 Cmp $liType, $variable;
331 0         0 Je $pass; # Process a variable
332 0         0 Cmp $liType, $Ascii;
333 0         0 Je $pass; # Process ascii constant
334 0         0 Cmp $liType, $semiColon;
335 0         0 Je $pass; # Process Semicolon
336 0         0 Cmp $liType, $NewLineSemiColon;
337 0         0 Je $pass; # Process new line semicolon
338             # Process non variable, i.e. operators specifically
339 0         0 my $N = $operators->subFromQuark($quarks, $q); # Look up the subroutine associated with this operator
340             If $N >= 0, # Found a matching operator subroutine
341             Then
342 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
343 0         0 };
344             }
345             Pass # Process variables in general or items based on variables using a short string of length 1 being the lexical type of the item in question
346 0         0 {Shl $liType, 8; # Move lexical type into second byte
347 0         0 Inc $liType; # Show length
348 0         0 Pinsrq "xmm1", $liType, 0; # Load short string
349 0         0 my $q = $operators->subQuarks->locateQuarkFromShortString($s); # Offset for variable processing sub
350 0         0 $operators->subQuarks->numbersToStrings->get(index=>$q, # Load subroutine offset
351             my $N = V(element));
352              
353 0         0 Shr $liType, 8; # Restore lexical type
354             If $N >= 0, # Found a matching operator subroutine
355             Then
356 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
357 0         0 };
358 0         0 };
359             }
360             }
361 0         0 };
362              
363 0         0 $t->insert (V(key, $lexItemWidth * $j + $lexItemType), # Save lexical type in parse tree
364             V(data)->getReg($liType));
365             }
366             # Push new term onto the stack in place of the items popped off
367 0         0 $t->first->setReg($liOffset); # Offset of new term tree
368 0         0 Shl $liOffset, 32; # Push offset to term tree into the upper dword to make it look like a source offset
369 0         0 Or $liOffset."b", $term; # Mark as a term tree
370 0         0 $$locals{new}->getReg($liOffset); # New term comprised of a tree of old terms
371 0         0 PopR; # Restore stack to its position at the start
372             }
373 0         0 [qw(new)], with => $ParseUtf8SubDef,
374             # [qw(bs new
375             # numbersToStringsFirst stringsToNumbersFirst
376             # opNumbersToStringsFirst opStringsToNumbersFirst
377             # )],
378             name=>"Unisyn::Parse::new_$depth";
379              
380 0 0       0 PrintErrStringNL "New: $description" if $debug;
381              
382 0 0       0 if ($depth == 1) {Mov $w1, 1} # Copy the top of the real stack which holds the parse state to zmm0 so that we can adjust the stack to call L
  0 0       0  
383 0         0 elsif ($depth == 2) {Mov $w1, 3}
384 0         0 else {Mov $w1, 7}
385 0         0 Kmovq k1, $w1; # B is saved in L
386 0         0 Vmovdqu64 "zmm0{k1}", "[rsp]"; # Copy top lexical items on stack
387              
388             # $s->call(bs => $Parse->arena->bs, my $new = V('new'),
389             # numbersToStringsFirst => $Parse->quarks->numbersToStrings->first,
390             # stringsToNumbersFirst => $Parse->quarks->stringsToNumbers->first,
391             # opNumbersToStringsFirst => $Parse->operators ? $Parse->operators->subQuarks->numbersToStrings->first : 0,
392             # opStringsToNumbersFirst => $Parse->operators ? $Parse->operators->subQuarks->stringsToNumbers->first : 0,
393             # );
394              
395 0         0 $s->call(my $new = V('new'));
396              
397 0         0 $new->setReg($w1); # Save offset of new term in a work register
398 0         0 Add rsp, $depth * $wr; # Remove input terms from stack
399 0         0 Push $w1; # Save new term on stack
400             }
401              
402             sub error($) #P Write an error message and stop.
403 0     0 1 0 {my ($message) = @_; # Error message
404 0         0 PrintOutStringNL "Error: $message";
405 0         0 PrintOutString "Element: ";
406 0         0 PrintOutRegisterInHex $element;
407 0         0 PrintOutString "Index : ";
408 0         0 PrintOutRegisterInHex $index;
409 0         0 Exit(0);
410             }
411              
412             sub testSet($$) #P Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
413 0     0 1 0 {my ($set, $register) = @_; # Set of lexical letters, Register to test
414 0         0 my @n = map {sprintf("0x%x", lexicalNumberFromLetter $_)} split //, $set; # Each lexical item by number from letter
  0         0  
415 0         0 my $end = Label;
416 0         0 for my $n(@n)
417 0         0 {Cmp $register."b", $n;
418 0         0 Je $end
419             }
420 0         0 ClearZF;
421 0         0 SetLabel $end;
422             }
423              
424             sub checkSet($) #P Check that one of a set of items is on the top of the stack or complain if it is not.
425 0     0 1 0 {my ($set) = @_; # Set of lexical letters
426 0         0 my @n = map {lexicalNumberFromLetter $_} split //, $set;
  0         0  
427 0         0 my $end = Label;
428              
429 0         0 for my $n(@n)
430 0         0 {Cmp "byte[rsp]", $n;
431 0         0 Je $end
432             }
433 0         0 error("Expected one of: '$set' on the stack");
434 0         0 ClearZF;
435 0         0 SetLabel $end;
436             }
437              
438             sub reduce($) #P Convert the longest possible expression on top of the stack into a term at the specified priority.
439 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
440 0         0 $priority =~ m(\A(1|3)\Z); # Level: 1 - all operators, 2 - priority 2 operators
441 0         0 my ($success, $end) = map {Label} 1..2; # Exit points
  0         0  
442              
443 0         0 checkStackHas 3; # At least three elements on the stack
444             IfGe
445             Then
446 0     0   0 {my ($l, $d, $r) = ($w1, $w2, $w3);
447 0         0 Mov $l, "[rsp+".(2*$ses)."]"; # Top 3 elements on the stack
448 0         0 Mov $d, "[rsp+".(1*$ses)."]";
449 0         0 Mov $r, "[rsp+".(0*$ses)."]";
450              
451 0 0       0 if ($debug)
452 0         0 {PrintErrStringNL "Reduce 3:";
453 0         0 PrintErrRegisterInHex $l, $d, $r;
454             }
455              
456 0         0 testSet("t", $l); # Parse out infix operator expression
457             IfEq
458             Then
459 0         0 {testSet("t", $r);
460             IfEq
461             Then
462 0 0       0 {testSet($priority == 1 ? "ads" : 'd', $d); # Reduce all operators or just reduce infix priority 3 operators
463             IfEq
464             Then
465 0         0 {Add rsp, 3 * $ses; # Reorder into polish notation
466 0         0 Push $_ for $d, $l, $r;
467 0         0 new(3, "Term infix term");
468 0         0 Jmp $success;
469 0         0 };
470 0         0 };
471 0         0 };
472              
473 0         0 testSet("b", $l); # Parse parenthesized term
474             IfEq
475             Then
476 0         0 {testSet("B", $r);
477             IfEq
478             Then
479 0         0 {testSet("t", $d);
480             IfEq
481             Then
482 0         0 {Add rsp, $ses;
483 0         0 new(1, "Bracketed term");
484 0         0 new(2, "Brackets for term");
485 0 0       0 PrintErrStringNL "Reduce by ( term )" if $debug;
486 0         0 Jmp $success;
487 0         0 };
488 0         0 };
489 0         0 };
490 0         0 };
491              
492 0         0 checkStackHas 2; # At least two elements on the stack
493             IfGe # Convert an empty pair of parentheses to an empty term
494             Then
495 0     0   0 {my ($l, $r) = ($w1, $w2);
496              
497 0 0       0 if ($debug)
498 0         0 {PrintErrStringNL "Reduce 2:";
499 0         0 PrintErrRegisterInHex $l, $r;
500             }
501              
502             # KeepFree $l, $r; # Why ?
503 0         0 Mov $l, "[rsp+".(1*$ses)."]"; # Top 3 elements on the stack
504 0         0 Mov $r, "[rsp+".(0*$ses)."]";
505 0         0 testSet("b", $l); # Empty pair of parentheses
506             IfEq
507             Then
508 0         0 {testSet("B", $r);
509             IfEq
510             Then
511 0         0 {Add rsp, 2 * $ses; # Pop expression
512 0         0 Push $l; # Bracket as operator
513 0         0 new(1, "Empty brackets");
514 0         0 Jmp $success;
515 0         0 };
516 0         0 };
517 0         0 testSet("s", $l); # Semi-colon, close implies remove unneeded semi
518             IfEq
519             Then
520 0         0 {testSet("B", $r);
521             IfEq
522             Then
523 0         0 {Add rsp, 2 * $ses; # Pop expression
524 0         0 Push $r;
525 0 0       0 PrintErrStringNL "Reduce by ;)" if $debug;
526 0         0 Jmp $success;
527 0         0 };
528 0         0 };
529 0         0 testSet("p", $l); # Prefix, term
530             IfEq
531             Then
532 0         0 {testSet("t", $r);
533             IfEq
534             Then
535 0         0 {new(2, "Prefix term");
536 0         0 Jmp $success;
537 0         0 };
538 0         0 };
539             # KeepFree $l, $r;
540 0         0 };
541              
542 0         0 ClearZF; # Failed to match anything
543 0         0 Jmp $end;
544              
545 0         0 SetLabel $success; # Successfully matched
546 0         0 SetZF;
547              
548 0         0 SetLabel $end; # End
549             } # reduce
550              
551             sub reduceMultiple($) #P Reduce existing operators on the stack.
552 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
553             K('count',99)->for(sub # An improbably high but finite number of reductions
554 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
555 0         0 reduce($priority);
556 0         0 Jne $end; # Keep going as long as reductions are possible
557 0         0 });
558             }
559              
560             sub accept_a() #P Assign.
561 0     0 1 0 {checkSet("t");
562 0         0 reduceMultiple 2;
563 0 0       0 PrintErrStringNL "accept a" if $debug;
564 0         0 pushElement;
565             }
566              
567             sub accept_b #P Open.
568 0     0 1 0 {checkSet("abdps");
569 0 0       0 PrintErrStringNL "accept b" if $debug;
570 0         0 pushElement;
571             }
572              
573             sub accept_B #P Closing parenthesis.
574 0     0 1 0 {checkSet("bst");
575 0 0       0 PrintErrStringNL "accept B" if $debug;
576 0         0 reduceMultiple 1;
577 0         0 pushElement;
578 0         0 reduceMultiple 1;
579 0         0 checkSet("bst");
580             }
581              
582             sub accept_d #P Infix but not assign or semi-colon.
583 0     0 1 0 {checkSet("t");
584 0 0       0 PrintErrStringNL "accept d" if $debug;
585 0         0 pushElement;
586             }
587              
588             sub accept_p #P Prefix.
589 0     0 1 0 {checkSet("abdps");
590 0 0       0 PrintErrStringNL "accept p" if $debug;
591 0         0 pushElement;
592             }
593              
594             sub accept_q #P Post fix.
595 0     0 1 0 {checkSet("t");
596 0 0       0 PrintErrStringNL "accept q" if $debug;
597             IfEq # Post fix operator applied to a term
598             Then
599 0     0   0 {Pop $w1;
600 0         0 pushElement;
601 0         0 Push $w1;
602 0         0 new(2, "Postfix");
603             }
604 0         0 }
605              
606             sub accept_s #P Semi colon.
607 0     0 1 0 {checkSet("bst");
608 0 0       0 PrintErrStringNL "accept s" if $debug;
609 0         0 Mov $w1, "[rsp]";
610 0         0 testSet("s", $w1);
611             IfEq # Insert an empty element between two consecutive semicolons
612             Then
613 0     0   0 {pushEmpty;
614 0         0 };
615 0         0 reduceMultiple 1;
616 0         0 pushElement;
617             }
618              
619             sub accept_v #P Variable.
620 0     0 1 0 {checkSet("abdps");
621 0 0       0 PrintErrStringNL "accept v" if $debug;
622 0         0 pushElement;
623 0         0 new(1, "Variable");
624             V(count,99)->for(sub # Reduce prefix operators
625 0     0   0 {my ($index, $start, $next, $end) = @_;
626 0         0 checkStackHas 2;
627 0         0 Jl $end;
628 0         0 my ($l, $r) = ($w1, $w2);
629 0         0 Mov $l, "[rsp+".(1*$ses)."]";
630 0         0 Mov $r, "[rsp+".(0*$ses)."]";
631 0         0 testSet("p", $l);
632 0         0 Jne $end;
633 0         0 new(2, "Prefixed variable");
634 0         0 });
635             }
636              
637             sub parseExpression() #P Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
638 0     0 1 0 {my $end = Label;
639 0         0 my $eb = $element."b"; # Contains a byte from the item being parsed
640              
641 0         0 Cmp $size, 0; # Check for empty expression
642 0         0 Je $end;
643              
644 0         0 loadCurrentChar; # Load current character
645             ### Need test for ignorable white space as first character
646 0         0 testSet($firstSet, $element);
647             IfNe
648             Then
649 0     0   0 {error(<
650             Expression must start with 'opening parenthesis', 'prefix
651             operator', 'semi-colon' or 'variable'.
652             END
653 0         0 };
654              
655 0         0 testSet("v", $element); # Single variable
656             IfEq
657             Then
658 0     0   0 {pushElement;
659 0         0 new(1, "accept initial variable");
660             },
661             Else
662 0     0   0 {testSet("s", $element); # Semi
663             IfEq
664             Then
665 0         0 {pushEmpty;
666 0         0 new(1, "accept initial semicolon");
667 0         0 };
668 0         0 pushElement;
669 0         0 };
670              
671 0         0 Inc $index; # We have processed the first character above
672 0         0 Mov $prevChar, $element; # Initialize the previous lexical item
673              
674             For # Parse each utf32 character after it has been classified
675 0     0   0 {my ($start, $end, $next) = @_; # Start and end of the classification loop
676 0         0 loadCurrentChar; # Load current character
677              
678 0 0       0 PrintErrRegisterInHex $element if $debug;
679              
680 0         0 Cmp $eb, $WhiteSpace;
681 0         0 Je $next; # Ignore white space
682              
683 0         0 Cmp $eb, 1; # Brackets are singular but everything else can potential be a plurality
684             IfGt
685             Then
686 0         0 {Cmp $prevChar."b", $eb; # Compare with previous element known not to be white space or a bracket
687 0         0 Je $next
688 0         0 };
689 0         0 Mov $prevChar, $element; # Save element to previous element now we know we are on a different element
690              
691 0         0 for my $l(sort keys $Lex->{lexicals}->%*) # Each possible lexical item after classification
692 0         0 {my $x = $Lex->{lexicals}{$l}{letter};
693 0 0       0 next unless $x; # Skip characters that do not have a letter defined for Tree::Term because the lexical items needed to layout a file of lexical items are folded down to the actual lexical items required to represent the language independent of the textual layout with white space.
694              
695 0         0 my $n = $Lex->{lexicals}{$l}{number};
696 0         0 Comment "Compare to $n for $l";
697 0         0 Cmp $eb, $n;
698              
699             IfEq
700             Then
701 0         0 {eval "accept_$x";
702 0         0 Jmp $next
703 0         0 };
704             }
705 0         0 error("Unexpected lexical item"); # Not selected
706 0         0 } $index, $size;
707              
708 0         0 testSet($lastSet, $prevChar); # Last lexical element
709             IfNe # Incomplete expression
710             Then
711 0     0   0 {error("Incomplete expression");
712 0         0 };
713              
714             K('count', 99)->for(sub # Remove trailing semicolons if present
715 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
716 0         0 checkStackHas 2;
717 0         0 Jl $end; # Does not have two or more elements
718 0         0 Pop $w1;
719 0         0 testSet("s", $w1); # Check that the top most element is a semi colon
720             IfNe # Not a semi colon so put it back and finish the loop
721             Then
722 0         0 {Push $w1;
723 0         0 Jmp $end;
724 0         0 };
725 0         0 });
726              
727 0         0 reduceMultiple 1; # Final reductions
728              
729 0         0 checkStackHas 1;
730             IfNe # Incomplete expression
731             Then
732 0     0   0 {error("Multiple expressions on stack");
733 0         0 };
734              
735 0         0 Pop r15; # The resulting parse tree
736 0         0 Shr r15, 32; # The offset of the resulting parse tree
737 0         0 SetLabel $end;
738             } # parseExpression
739              
740             sub MatchBrackets(@) #P Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
741 0     0 1 0 {my (@parameters) = @_; # Parameters
742 0 0       0 @_ >= 1 or confess "One or more parameters";
743              
744             my $s = Subroutine
745 0     0   0 {my ($p) = @_; # Parameters
746 0         0 Comment "Match brackets in utf32 text";
747              
748 0         0 my $finish = Label;
749 0         0 PushR xmm0, k7, r10, r11, r12, r13, r14, r15, rsi; # R15 current character address. r14 is the current classification. r13 the last classification code. r12 the stack depth. r11 the number of opening brackets found. r10 address of first utf32 character.
750              
751 0         0 Mov rsi, rsp; # Save stack location so we can use the stack to record the brackets we have found
752 0         0 ClearRegisters r11, r12, r15; # Count the number of brackets and track the stack depth, index of each character
753 0         0 K(three, 3)->setMaskFirst(k7); # These are the number of bytes that we are going to use for the offsets of brackets which limits the size of a program to 24 million utf32 characters
754 0         0 $$p{fail} ->getReg(r11); # Clear failure indicator
755 0         0 $$p{opens} ->getReg(r11); # Clear count of opens
756 0         0 $$p{address}->setReg(r10); # Address of first utf32 character
757 0         0 my $w = RegisterSize eax; # Size of a utf32 character
758              
759             $$p{size}->for(sub # Process each utf32 character in the block of memory
760 0         0 {my ($index, $start, $next, $end) = @_;
761 0         0 my $continue = Label;
762              
763 0         0 Mov r14b, "[r10+$w*r15+3]"; # Classification character
764              
765 0         0 Cmp r14, 0x10; # First bracket
766 0         0 Jl $continue; # Less than first bracket
767 0         0 Cmp r14, 0x4f; # Last bracket
768 0         0 Jg $continue; # Greater than last bracket
769              
770 0         0 Test r14, 1; # Zero means that the bracket is an opener
771             IfZ sub # Save an opener then continue
772 0         0 {Push r15; # Save position in input
773 0         0 Push r14; # Save opening code
774 0         0 Inc r11; # Count number of opening brackets
775 0         0 Inc r12; # Number of brackets currently open
776 0         0 Jmp $continue;
777 0         0 };
778 0         0 Cmp r12, 1; # Check that there is a bracket to match on the stack
779             IfLt sub # Nothing on stack
780 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
781 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
782 0         0 Jmp $finish; # Return
783 0         0 };
784 0         0 Mov r13, "[rsp]"; # Peek at the opening bracket code which is on top of the stack
785 0         0 Inc r13; # Expected closing bracket
786 0         0 Cmp r13, r14; # Check for match
787             IfNe sub # Mismatch
788 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
789 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
790 0         0 Jmp $finish; # Return
791 0         0 };
792 0         0 Pop r13; # The closing bracket matches the opening bracket
793 0         0 Pop r13; # Offset of opener
794 0         0 Dec r12; # Close off bracket sequence
795 0         0 Vpbroadcastq xmm0, r15; # Load offset of opener
796 0         0 Vmovdqu8 "[r10+$w*r13]\{k7}", xmm0; # Save offset of opener in the code for the closer - the classification is left intact so we still know what kind of bracket we have
797 0         0 Vpbroadcastq xmm0, r13; # Load offset of opener
798 0         0 Vmovdqu8 "[r10+$w*r15]\{k7}", xmm0; # Save offset of closer in the code for the openercloser - the classification is left intact so we still know what kind of bracket we have
799 0         0 SetLabel $continue; # Continue with next character
800 0         0 Inc r15; # Next character
801 0         0 });
802              
803 0         0 SetLabel $finish;
804 0         0 Mov rsp, rsi; # Restore stack
805 0         0 $$p{opens}->getReg(r11); # Number of brackets opened
806 0         0 PopR;
807 0         0 } [qw(address size fail opens)], name => q(Unisyn::Parse::MatchBrackets);
808              
809 0         0 $s->call(@parameters);
810             } # MatchBrackets
811              
812             sub ClassifyNewLines(@) #P Scan input string looking for opportunities to convert new lines into semi colons.
813 0     0 1 0 {my (@parameters) = @_; # Parameters
814 0 0       0 @_ >= 1 or confess "One or more parameters";
815              
816             my $s = Subroutine
817 0     0   0 {my ($p) = @_; # Parameters
818 0         0 my $current = r15; # Index of the current character
819 0         0 my $middle = r14; # Index of the middle character
820 0         0 my $first = r13; # Index of the first character
821 0         0 my $address = r12; # Address of input string
822 0         0 my $size = r11; # Length of input utf32 string
823 0         0 my($c1, $c2) = (r8."b", r9."b"); # Lexical codes being tested
824              
825 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
826              
827 0         0 $$p{address}->setReg($address); # Address of string
828 0         0 $$p{size} ->setReg($size); # Size of string
829 0         0 Mov $current, 2; Mov $middle, 1; Mov $first, 0;
  0         0  
  0         0  
830              
831             For # Each character in input string
832 0         0 {my ($start, $end, $next) = @_; # Start, end and next labels
833              
834              
835 0         0 getLexicalCode $c1, $address, $middle; # Lexical code of the middle character
836 0         0 Cmp $c1, $WhiteSpace;
837             IfEq
838             Then
839 0         0 {getAlpha $c1, $address, $middle;
840              
841 0         0 Cmp $c1, $asciiNewLine;
842             IfEq # Middle character is a insignificant new line and thus could be a semicolon
843             Then
844 0         0 {getLexicalCode $c1, $address, $first;
845              
846             my sub makeSemiColon # Make a new line into a new line semicolon
847 0         0 {putLexicalCode $c2, $address, $middle, $NewLineSemiColon;
848             }
849              
850             my sub check_bpv # Make new line if followed by 'b', 'p' or 'v'
851 0         0 {getLexicalCode $c1, $address, $current;
852 0         0 Cmp $c1, $OpenBracket;
853              
854             IfEq
855             Then
856 0         0 {makeSemiColon;
857             },
858             Else
859 0         0 {Cmp $c1, $prefix;
860             IfEq
861             Then
862 0         0 {makeSemiColon;
863             },
864             Else
865 0         0 {Cmp $c1, $variable;
866             IfEq
867             Then
868 0         0 {makeSemiColon;
869 0         0 };
870 0         0 };
871 0         0 };
872             }
873              
874 0         0 Cmp $c1, $CloseBracket; # Check first character of sequence
875             IfEq
876             Then
877 0         0 {check_bpv;
878             },
879             Else
880 0         0 {Cmp $c1, $suffix;
881             IfEq
882             Then
883 0         0 {check_bpv;
884             },
885             Else
886 0         0 {Cmp $c1, $variable;
887             IfEq
888             Then
889 0         0 {check_bpv;
890 0         0 };
891 0         0 };
892 0         0 };
893 0         0 };
894 0         0 };
895              
896 0         0 Mov $first, $middle; Mov $middle, $current; # Find next lexical item
  0         0  
897 0         0 getLexicalCode $c1, $address, $current; # Current lexical code
898 0         0 Mov $middle, $current;
899 0         0 Inc $current; # Next possible character
900             For
901 0         0 {my ($start, $end, $next) = @_;
902 0         0 getLexicalCode $c2, $address, $current; # Lexical code of next character
903 0         0 Cmp $c1, $c2;
904 0         0 Jne $end; # Terminate when we are in a different lexical item
905 0         0 } $current, $size;
906 0         0 } $current, $size;
907              
908 0         0 PopR;
909 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyNewLines);
910              
911 0         0 $s->call(@parameters);
912             } # ClassifyNewLines
913              
914             sub ClassifyWhiteSpace(@) #P Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
915 0     0 1 0 {my (@parameters) = @_; # Parameters
916 0 0       0 @_ >= 1 or confess "One or more parameters";
917              
918             my $s = Subroutine
919 0         0 {my ($p) = @_; # Parameters
920 0         0 my $eb = r15."b"; # Lexical type of current char
921 0         0 my $s = r14; # State of white space between 'a'
922 0         0 my $S = r13; # State of white space before 'a'
923 0         0 my $cb = r12."b"; # Actual character within alphabet
924 0         0 my $address = r11; # Address of input string
925 0         0 my $index = r10; # Index of current char
926 0         0 my ($w1, $w2) = (r8."b", r9."b"); # Temporary work registers
927              
928             my sub getAlpha($;$) # Load the position of a lexical item in its alphabet from the current character
929 0         0 {my ($register, $indexReg) = @_; # Register to load, optional index register
930 0   0     0 getAlpha $register, $address, $index // $indexReg # Supplied index or default
931             };
932              
933             my sub getLexicalCode() # Load the lexical code of the current character in memory into the current character
934 0         0 {getLexicalCode $eb, $address, $index; # Supplied index or default
935             };
936              
937             my sub putLexicalCode($;$) # Put the specified lexical code into the current character in memory.
938 0         0 {my ($code, $indexReg) = @_; # Code, optional index register
939 0   0     0 putLexicalCode $w1, $address, ($indexReg//$index), $code;
940             };
941              
942 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
943              
944 0         0 $$p{address}->setReg($address); # Address of string
945 0         0 Mov $s, -1; Mov $S, -1; Mov $index, 0; # Initial states, position
  0         0  
  0         0  
946              
947             $$p{size}->for(sub # Each character in expression
948 0         0 {my ($indexVariable, $start, $next, $end) = @_;
949              
950 0         0 $indexVariable->setReg($index);
951 0         0 getLexicalCode; # Current lexical code
952              
953             AndBlock # Trap space before new line and detect new line after ascii
954 0         0 {my ($end, $start) = @_;
955 0         0 Cmp $index, 0; Je $end; # Start beyond the first character so we can look back one character.
  0         0  
956 0         0 Cmp $eb, $Ascii; Jne $end; # Current is ascii
  0         0  
957              
958 0         0 Mov $w1, "[$address+$indexScale*$index-$indexScale+$lexCodeOffset]"; # Previous lexical code
959 0         0 Cmp $w1, $Ascii; Jne $end; # Previous is ascii
  0         0  
960              
961 0         0 if (1) # Check for 's' followed by 'n' and 'a' followed by 'n'
962 0         0 {Mov $w1, "[$address+$indexScale*$index-$indexScale]"; # Previous character
963 0         0 getAlpha $w2; # Current character
964              
965 0         0 Cmp $w1, $asciiSpace; # Check for space followed by new line
966             IfEq
967             Then
968 0         0 {Cmp $w2, $asciiNewLine;
969             IfEq # Disallow 's' followed by 'n'
970             Then
971 0         0 {PrintErrStringNL "Space detected before new line at index:";
972 0         0 PrintErrRegisterInHex $index;
973 0         0 PrintErrTraceBack;
974 0         0 Exit(1);
975 0         0 };
976 0         0 };
977              
978 0         0 Cmp $w1, $asciiSpace; Je $end; # Check for 'a' followed by 'n'
  0         0  
979 0         0 Cmp $w1, $asciiNewLine; Je $end; # Current is 'a' but not 'n' or 's'
  0         0  
980 0         0 Cmp $w2, $asciiNewLine; Jne $end; # Current is 'n'
  0         0  
981              
982 0         0 putLexicalCode $WhiteSpace; # Mark new line as significant
983             }
984 0         0 };
985              
986             AndBlock # Spaces and new lines between other ascii
987 0         0 {my ($end, $start) = @_;
988 0         0 Cmp $s, -1;
989             IfEq # Looking for opening ascii
990             Then
991 0         0 {Cmp $eb, $Ascii; Jne $end; # Not ascii
  0         0  
992 0         0 getAlpha $cb; # Current character
993 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
994 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
995             IfEq
996             Then
997 0         0 {Mov $s, $index; Inc $s; # Ascii not space nor new line
  0         0  
998 0         0 };
999 0         0 Jmp $end;
1000             },
1001             Else # Looking for closing ascii
1002 0         0 {Cmp $eb, $Ascii;
1003             IfNe # Not ascii
1004             Then
1005 0         0 {Mov $s, -1;
1006 0         0 Jmp $end
1007 0         0 };
1008 0         0 getAlpha $cb; # Current character
1009 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
1010 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
1011              
1012             For # Move over spaces and new lines between two ascii characters that are neither of new line or space
1013 0         0 {my ($start, $end, $next) = @_;
1014 0         0 getAlpha $cb, $s; # Check for 's' or 'n'
1015 0         0 Cmp $cb, $asciiSpace;
1016             IfEq
1017             Then
1018 0         0 {putLexicalCode $WhiteSpace, $s; # Mark as significant white space.
1019 0         0 Jmp $next;
1020 0         0 };
1021 0         0 Cmp $cb, $asciiNewLine;
1022             IfEq
1023             Then
1024 0         0 {putLexicalCode $WhiteSpace; # Mark as significant new line
1025 0         0 Jmp $next;
1026 0         0 };
1027 0         0 } $s, $index;
1028              
1029 0         0 Mov $s, $index; Inc $s;
  0         0  
1030 0         0 };
1031 0         0 };
1032              
1033             AndBlock # Note: 's' preceding 'a' are significant
1034 0         0 {my ($end, $start) = @_;
1035 0         0 Cmp $S, -1;
1036             IfEq # Looking for 's'
1037             Then
1038 0         0 {Cmp $eb, $Ascii; # Not 'a'
1039             IfNe
1040             Then
1041 0         0 {Mov $S, -1;
1042 0         0 Jmp $end
1043 0         0 };
1044 0         0 getAlpha $cb; # Actual character in alphabet
1045 0         0 Cmp $cb, $asciiSpace; # Space
1046             IfEq
1047             Then
1048 0         0 {Mov $S, $index;
1049 0         0 Jmp $end;
1050 0         0 };
1051             },
1052             Else # Looking for 'a'
1053 0         0 {Cmp $eb, $Ascii; # Not 'a'
1054             IfNe
1055             Then
1056 0         0 {Mov $S, -1;
1057 0         0 Jmp $end
1058 0         0 };
1059 0         0 getAlpha $cb; # Actual character in alphabet
1060 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip 's'
  0         0  
1061              
1062 0         0 Cmp $cb, $asciiNewLine;
1063             IfEq # New lines prevent 's' from preceding 'a'
1064             Then
1065 0         0 {Mov $s, -1;
1066 0         0 Jmp $end
1067 0         0 };
1068              
1069             For # Move over spaces to non space ascii
1070 0         0 {my ($start, $end, $next) = @_;
1071 0         0 putLexicalCode $WhiteSpace, $S; # Mark new line as significant
1072 0         0 } $S, $index;
1073 0         0 Mov $S, -1; # Look for next possible space
1074             }
1075 0         0 };
  0         0  
1076 0         0 });
1077              
1078             $$p{size}->for(sub # Invert white space so that significant white space becomes ascii and the remainder is ignored
1079 0         0 {my ($indexVariable, $start, $next, $end) = @_;
1080              
1081 0         0 $indexVariable->setReg($index);
1082 0         0 getLexicalCode; # Current lexical code
1083              
1084             AndBlock # Invert non significant white space
1085 0         0 {my ($end, $start) = @_;
1086 0         0 Cmp $eb, $Ascii;
1087 0         0 Jne $end; # Ascii
1088              
1089 0         0 getAlpha $cb; # Actual character in alphabet
1090 0         0 Cmp $cb, $asciiSpace;
1091             IfEq
1092             Then
1093 0         0 {putLexicalCode $WhiteSpace;
1094 0         0 Jmp $next;
1095 0         0 };
1096 0         0 Cmp $cb, $asciiNewLine;
1097             IfEq
1098             Then
1099 0         0 {putLexicalCode $WhiteSpace; # Mark new line as not significant
1100 0         0 Jmp $next;
1101 0         0 };
1102 0         0 };
1103              
1104             AndBlock # Mark significant white space
1105 0         0 {my ($end, $start) = @_;
1106 0         0 Cmp $eb, $WhiteSpace; Jne $end; # Not significant white space
  0         0  
1107 0         0 putLexicalCode $Ascii; # Mark as ascii
1108 0         0 };
1109 0         0 });
1110              
1111 0         0 PopR;
1112 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyWhiteSpace);
1113              
1114 0         0 $s->call(@parameters);
1115             } # ClassifyWhiteSpace
1116              
1117             sub reload($$) #P Reload the variables associated with a parse.
1118 0     0 1 0 {my ($parse, $parameters) = @_; # Parse, hash of variable parameters
1119 0 0       0 @_ >= 1 or confess "One or more parameters";
1120              
1121             $parse->quarks->reload (arena => $$parameters{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
1122             array => $$parameters{numbersToStringsFirst},
1123 0         0 tree => $$parameters{stringsToNumbersFirst});
1124              
1125             $parse->operators->reload(arena => $$parameters{bs}, # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
1126             array => $$parameters{opNumbersToStringsFirst},
1127 0 0       0 tree => $$parameters{opStringsToNumbersFirst}) if $parse->operators;
1128             }
1129              
1130             sub parseUtf8($@) #P Parse a unisyn expression encoded as utf8 and return the parse tree.
1131 0     0 1 0 {my ($parse, @parameters) = @_; # Parse, parameters
1132 0 0       0 @_ >= 1 or confess "One or more parameters";
1133              
1134             my $s = Subroutine
1135 0         0 {my ($p, $s) = @_; # Parameters
1136 0         0 $ParseUtf8SubDef = $s; # Save the sub definition globally so that we can forward its parameter list to L.
1137              
1138 0         0 $parse->reload($p); # Reload the parse description
1139 0 0       0 PrintErrStringNL "ParseUtf8" if $debug;
1140              
1141 0         0 PushR $parseStackBase, map {"r$_"} 8..15;
  0         0  
1142 0         0 PushZmm 0..1; PushMask 0..2; # Used to hold arena and classifiers. Zmm0 is used to as a short string to quark the lexical item strings.
  0         0  
1143              
1144 0         0 my $source32 = $$p{source32};
1145 0         0 my $sourceSize32 = $$p{sourceSize32};
1146 0         0 my $sourceLength32 = $$p{sourceLength32};
1147              
1148             ConvertUtf8ToUtf32 u8 => $$p{address}, size8 => $$p{size}, # Convert to utf32
1149 0         0 u32 => $source32, size32 => $sourceSize32,
1150             count => $sourceLength32;
1151              
1152             my sub PrintUtf32($$) # Print a utf32 string in hexadecimal
1153 0         0 {my ($size, $address) = @_; # Variable size, variable address
1154 0         0 $address->printErrMemoryInHexNL($size);
1155             }
1156              
1157 0 0       0 if ($debug)
1158 0         0 {PrintErrStringNL "After conversion from utf8 to utf32";
1159 0         0 $sourceSize32 ->errNL("Output Length: "); # Write output length
1160 0         0 PrintUtf32($sourceSize32, $source32); # Print utf32
1161             }
1162              
1163 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{lexicalLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1164 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{lexicalHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1165              
1166 0         0 ClassifyWithInRangeAndSaveOffset address=>$source32, size=>$sourceLength32; # Alphabetic classification
1167 0 0       0 if ($debug)
1168 0         0 {PrintErrStringNL "After classification into alphabet ranges";
1169 0         0 PrintUtf32($sourceSize32, $source32); # Print classified utf32
1170             }
1171              
1172 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{bracketsLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1173 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{bracketsHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1174              
1175 0         0 ClassifyWithInRange address=>$source32, size=>$sourceLength32; # Bracket classification
1176 0 0       0 if ($debug)
1177 0         0 {PrintErrStringNL "After classification into brackets";
1178 0         0 PrintUtf32($sourceSize32, $source32); # Print classified brackets
1179             }
1180              
1181 0         0 my $opens = V(opens, -1);
1182 0         0 MatchBrackets address=>$source32, size=>$sourceLength32, $opens, $$p{fail}; # Match brackets
1183 0 0       0 if ($debug)
1184 0         0 {PrintErrStringNL "After bracket matching";
1185 0         0 PrintUtf32($sourceSize32, $source32); # Print matched brackets
1186             }
1187              
1188 0         0 ClassifyWhiteSpace address=>$source32, size=>$sourceLength32; # Classify white space
1189 0 0       0 if ($debug)
1190 0         0 {PrintErrStringNL "After white space classification";
1191 0         0 PrintUtf32($sourceSize32, $source32);
1192             }
1193              
1194 0         0 ClassifyNewLines address=>$source32, size=>$sourceLength32; # Classify new lines
1195 0 0       0 if ($debug)
1196 0         0 {PrintErrStringNL "After classifying new lines";
1197 0         0 PrintUtf32($sourceSize32, $source32);
1198             }
1199              
1200 0         0 $$p{source32} ->setReg($start); # Start of expression string after it has been classified
1201 0         0 $$p{sourceLength32}->setReg($size); # Number of characters in the expression
1202 0         0 Mov $parseStackBase, rsp; # Set base of parse stack
1203              
1204 0         0 parseExpression; # Parse the expression
1205              
1206 0         0 $$p{parse}->getReg(r15); # Number of characters in the expression
1207 0         0 Mov rsp, $parseStackBase; # Remove parse stack
1208              
1209 0 0       0 $$p{parse}->errNL if $debug;
1210              
1211 0         0 PopMask; PopZmm; PopR;
  0         0  
  0         0  
1212              
1213             }
1214 0         0 [qw(bs address size parse fail source32 sourceSize32 sourceLength32),
1215             qw(numbersToStringsFirst stringsToNumbersFirst),
1216             qw(opNumbersToStringsFirst opStringsToNumbersFirst)],
1217             name => q(Unisyn::Parse::parseUtf8);
1218              
1219 0         0 my $op = $parse->operators; # The operator methods if supplied
1220 0         0 my $zero = K(zero, 0);
1221              
1222 0 0       0 $s->call # Parameterize the parse
    0          
1223             (bs => $parse->arena->bs,
1224             address => $parse->address8,
1225             fail => $parse->fails,
1226             parse => $parse->parse,
1227             size => $parse->size8,
1228             source32 => $parse->source32,
1229             sourceLength32 => $parse->sourceLength32,
1230             sourceSize32 => $parse->sourceSize32,
1231             numbersToStringsFirst => $parse->quarks->numbersToStrings->first,
1232             stringsToNumbersFirst => $parse->quarks->stringsToNumbers->first,
1233             opNumbersToStringsFirst => $op ? $op->subQuarks->numbersToStrings->first : $zero,
1234             opStringsToNumbersFirst => $op ? $op->subQuarks->stringsToNumbers->first : $zero,
1235             );
1236             } # parseUtf8
1237              
1238             #D1 Traverse # Traverse the parse tree
1239              
1240             sub traverseParseTree($) # Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
1241 0     0 1 0 {my ($parse) = @_; # Parse tree
1242              
1243             my $s = Subroutine # Print a tree
1244 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1245 0         0 my $t = Nasm::X86::DescribeTree (arena=>$$p{bs}, first=>$$p{first});
1246 0         0 $t->find(K(key, $opType)); # The lexical type of the element - normally a term
1247              
1248             If $t->found == 0, # Not found lexical type of element
1249             Then
1250 0         0 {PrintOutString "No type for node";
1251 0         0 Exit(1);
1252 0         0 };
1253              
1254             If $t->data != $term, # Expected a term
1255             Then
1256 0         0 {PrintOutString "Expected a term";
1257 0         0 Exit(1);
1258 0         0 };
1259              
1260 0         0 my $operands = V(operands); # Number of operands
1261 0         0 $t->find(K(key, 1)); # Key 1 tells us the number of operands
1262             If $t->found > 0, # Found key 1
1263             Then
1264 0         0 {$operands->copy($t->data); # Number of operands
1265             },
1266             Else
1267 0         0 {PrintOutString "Expected at least one operand";
1268 0         0 Exit(1);
1269 0         0 };
1270              
1271             $operands->for(sub # Each operand
1272 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1273 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1274 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1275 0         0 $t->find($i+$lexItemOffset); my $off = V(key) ->copy($t->data); # Offset of first block of sub tree
  0         0  
1276              
1277             If $lex == $term, # Term
1278             Then
1279 0         0 {$s->call($$p{bs}, first => $off); # Traverse sub tree referenced by offset field
1280 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1281             },
1282 0         0 });
  0         0  
1283              
1284 0         0 $t->find(K(key, $opSub)); # The subroutine for the term
1285             If $t->found > 0, # Found subroutine for term
1286             Then # Call subroutine for this term
1287             {#PushR r15, zmm0;
1288             my $p = Subroutine # Prototype subroutine to establish parameter list
1289 0         0 {} [qw(tree)], with => $s,
1290             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::prototype";
1291              
1292             my $d = Subroutine # Dispatcher
1293 0         0 {my ($parameters, $sub) = @_;
1294 0         0 $p->dispatchV($t->data, r15);
1295 0         0 } [], with => $p,
1296             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::dispatch";
1297              
1298 0         0 $d->call(tree => $t->first);
1299              
1300             # my $p = Subroutine # Subroutine
1301             # {my ($parameters) = @_; # Parameters
1302             # $$parameters{call}->setReg(r15);
1303             # Call r15;
1304             # } [qw(tree call)], with => $s,
1305             # name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem";
1306             #
1307             # my $l = RegisterSize rax;
1308             # $$p{bs} ->putQIntoZmm(0, 0*$l, r15);
1309             # $$p{first}->putQIntoZmm(0, 1*$l, r15);
1310             # $t->data ->setReg(r15);
1311             # Call r15;
1312             # #PopR;
1313 0         0 };
1314              
1315 0         0 } [qw(bs first)], name => "Nasm::X86::Tree::traverseParseTree";
1316              
1317 0         0 PushR r15, zmm0;
1318 0         0 $s->call($parse->arena->bs, first => $parse->parse);
1319 0         0 PopR;
1320              
1321 0         0 $a
1322             } # traverseParseTree
1323              
1324             #D1 Print # Print a parse tree
1325              
1326             sub printLexicalItem($$$$) #P Print the utf8 string corresponding to a lexical item at a variable offset.
1327 0     0 1 0 {my ($parse, $source32, $offset, $size) = @_; # Parse tree, B
of utf32 source representation, B to lexical item in utf32, B in utf32 chars of item
1328 0         0 my $t = $parse->arena->DescribeTree;
1329              
1330             my $s = Subroutine
1331 0     0   0 {my ($p, $s) = @_; # Parameters
1332 0         0 PushR r12, r13, r14, r15;
1333              
1334 0         0 $$p{source32}->setReg(r14);
1335 0         0 $$p{offset} ->setReg(r15);
1336 0         0 Lea r13, "[r14+4*r15]"; # Address lexical item
1337 0         0 Mov eax, "[r13]"; # First lexical item clearing rax
1338 0         0 Shr rax, 24; # First lexical item type in lowest byte and all else cleared
1339              
1340 0         0 my $success = Label;
1341 0         0 my $print = Label;
1342              
1343 0         0 Cmp rax, $bracketsBase; # Test for brackets
1344             IfGe
1345             Then
1346 0         0 {my $o = $Lex->{bracketsOpen}; # Opening brackets
1347 0         0 my $c = $Lex->{bracketsClose}; # Closing brackets
1348 0         0 my $O = Rutf8 map {($_, chr(0))} @$o; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1349 0         0 my $C = Rutf8 map {($_, chr(0))} @$c; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1350 0         0 Mov r14, $O; # Address open bracket
1351 0         0 Mov r15, rax; # The bracket number
1352 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Index to bracket
1353 0         0 PrintOutUtf8Char; # Print opening bracket
1354 0         0 Mov r14, $C; # Address close bracket
1355 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Closing brackets occupy 3 bytes
1356 0         0 PrintOutUtf8Char; # Print closing bracket
1357 0         0 Jmp $success;
1358 0         0 };
1359              
1360 0         0 Mov r12, -1; # Alphabet to use
1361 0         0 Cmp rax, $variable; # Test for variable
1362             IfEq
1363             Then
1364 0         0 {my $b = $Lex->{alphabetsOrdered}{variable}; # Load variable alphabet in dwords
1365 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1366 0         0 my $a = Rd @b;
1367 0         0 Mov r12, $a;
1368 0         0 Jmp $print;
1369 0         0 };
1370              
1371 0         0 Cmp rax, $assign; # Assign operator
1372             IfEq
1373             Then
1374 0         0 {my $b = $Lex->{alphabetsOrdered}{assign};
1375 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1376 0         0 my $a = Rd @b;
1377 0         0 Mov r12, $a;
1378 0         0 Jmp $print;
1379 0         0 };
1380              
1381 0         0 Cmp rax, $dyad; # Dyad
1382             IfEq
1383             Then
1384 0         0 {my $b = $Lex->{alphabetsOrdered}{dyad};
1385 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1386 0         0 my $a = Rd @b;
1387 0         0 Mov r12, $a;
1388 0         0 Jmp $print;
1389 0         0 };
1390              
1391 0         0 Cmp rax, $Ascii; # Ascii
1392             IfEq
1393             Then
1394 0         0 {my $b = $Lex->{alphabetsOrdered}{Ascii};
1395 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1396 0         0 my $a = Rd @b;
1397 0         0 Mov r12, $a;
1398 0         0 Jmp $print;
1399 0         0 };
1400              
1401 0         0 Cmp rax, $prefix; # Prefix
1402             IfEq
1403             Then
1404 0         0 {my $b = $Lex->{alphabetsOrdered}{prefix};
1405 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1406 0         0 my $a = Rd @b;
1407 0         0 Mov r12, $a;
1408 0         0 Jmp $print;
1409 0         0 };
1410              
1411 0         0 Cmp rax, $suffix; # Suffix
1412             IfEq
1413             Then
1414 0         0 {my $b = $Lex->{alphabetsOrdered}{suffix};
1415 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1416 0         0 my $a = Rd @b;
1417 0         0 Mov r12, $a;
1418 0         0 Jmp $print;
1419 0         0 };
1420              
1421 0         0 PrintErrTraceBack; # Unknown lexical type
1422 0         0 PrintErrStringNL "Alphabet not found for unexpected lexical item";
1423 0         0 PrintErrRegisterInHex rax;
1424 0         0 Exit(1);
1425              
1426 0         0 SetLabel $print; # Decoded
1427              
1428             $$p{size}->for(sub # Write each letter out from its position on the stack
1429 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1430 0         0 $index->setReg(r14); # Index stack
1431 0         0 ClearRegisters r15; # Next instruction does not clear the entire register
1432 0         0 Mov r15b, "[r13+4*r14]"; # Load alphabet offset from stack
1433 0         0 Shl r15, 2; # Each letter is 4 bytes wide in utf8
1434 0         0 Lea rax, "[r12+r15]"; # Address alphabet letter as utf8
1435 0         0 PrintOutUtf8Char; # Print utf8 character
1436 0         0 });
1437              
1438 0         0 SetLabel $success; # Done
1439              
1440 0         0 PopR;
1441 0         0 } [qw(offset source32 size)],
1442             name => q(Unisyn::Parse::printLexicalItem);
1443              
1444 0         0 $s->call(offset => $offset, source32 => $source32, size => $size);
1445             }
1446              
1447             sub print($) # Print a parse tree.
1448 0     0 1 0 {my ($parse) = @_; # Parse tree
1449 0         0 my $t = $parse->arena->DescribeTree;
1450              
1451 0         0 PushR my ($depthR) = (r12); # Recursion depth
1452              
1453             my $b = Subroutine # Print the spacing blanks to offset sub trees
1454             {V(loop, $depthR)->for(sub
1455 0         0 {PrintOutString " ";
1456 0     0   0 });
1457 0         0 } [], name => "Nasm::X86::Tree::dump::spaces";
1458              
1459             my $s = Subroutine # Print a tree
1460 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1461              
1462 0         0 my $B = $$p{bs};
1463              
1464 0         0 $t->address->copy($$p{bs});
1465 0         0 $t->first ->copy($$p{first});
1466 0         0 $t->find(K(key, 0)); # Key 0 tells us the type of the element - normally a term
1467              
1468             If $t->found == 0, # Not found key 0
1469             Then
1470 0         0 {PrintOutString "No type for node";
1471 0         0 Exit(1);
1472 0         0 };
1473              
1474             If $t->data != $term, # Expected a term
1475             Then
1476 0         0 {PrintOutString "Expected a term";
1477 0         0 Exit(1);
1478 0         0 };
1479              
1480 0         0 my $operands = V(operands); # Number of operands
1481 0         0 $t->find(K(key, 1)); # Key 1 tells us the number of operands
1482             If $t->found > 0, # Found key 1
1483             Then
1484 0         0 {$operands->copy($t->data); # Number of operands
1485             },
1486             Else
1487 0         0 {PrintOutString "Expected at least one operand";
1488 0         0 Exit(1);
1489 0         0 };
1490              
1491             $operands->for(sub # Each operand
1492 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1493 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1494 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1495 0         0 $t->find($i+$lexItemOffset); my $off = V(data)->copy($t->data); # Offset in source
  0         0  
1496 0         0 $t->find($i+$lexItemLength); my $len = V(data)->copy($t->data); # Length in source
  0         0  
1497              
1498 0         0 $b->call; # Indent
1499              
1500             If $lex == $term, # Term
1501             Then
1502 0         0 {PrintOutStringNL "Term";
1503 0         0 Inc $depthR; # Increase indentation for sub terms
1504 0         0 $s->call($B, first => $off, $$p{source32}); # Print sub tree referenced by offset field
1505 0         0 Dec $depthR; # Restore existing indentation
1506 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1507             },
1508              
1509 0         0 Ef {$lex == $semiColon} # Semicolon
1510             Then
1511 0         0 {PrintOutStringNL "Semicolon";
1512             },
1513              
1514             Else
1515             {If $lex == $variable, # Variable
1516             Then
1517 0         0 {PrintOutString "Variable: ";
1518             },
1519              
1520 0         0 Ef {$lex == $assign} # Assign
1521             Then
1522 0         0 {PrintOutString "Assign: ";
1523             },
1524              
1525 0         0 Ef {$lex == $prefix} # Prefix
1526             Then
1527 0         0 {PrintOutString "Prefix: ";
1528             },
1529              
1530 0         0 Ef {$lex == $suffix} # Suffix
1531             Then
1532 0         0 {PrintOutString "Suffix: ";
1533             },
1534              
1535 0         0 Ef {$lex == $dyad} # Dyad
1536             Then
1537 0         0 {PrintOutString "Dyad: ";
1538             },
1539              
1540 0         0 Ef {$lex == $Ascii} # Ascii
1541             Then
1542 0         0 {PrintOutString "Ascii: ";
1543             },
1544              
1545             Else # Brackets
1546 0         0 {PrintOutString "Brackets: ";
1547 0         0 };
1548              
1549 0         0 $parse->printLexicalItem($$p{source32}, $off, $len); # Print the variable name
1550 0         0 PrintOutNL;
1551 0         0 };
1552              
1553             If $index == 0, # Operator followed by indented operands
1554             Then
1555 0         0 {Inc $depthR;
1556 0         0 };
1557 0         0 });
1558              
1559 0         0 Dec $depthR; # Reset indentation after operands
1560 0         0 } [qw(bs first source32)], name => "Nasm::X86::Tree::print";
1561              
1562 0         0 ClearRegisters $depthR; # Depth starts at zero
1563              
1564 0         0 $s->call($parse->arena->bs, first => $parse->parse, $parse->source32);
1565              
1566 0         0 PopR;
1567             } # print
1568              
1569             sub dumpParseTree($) # Dump the parse tree.
1570 0     0 1 0 {my ($parse) = @_; # Parse tree
1571 0         0 my $t = $parse->arena->DescribeTree;
1572 0         0 $t->first->copy($parse->parse);
1573 0         0 $t->dump;
1574             }
1575              
1576             #D1 Execute # Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
1577              
1578             sub lexToSub($$$$) # Map a lexical item to a processing subroutine.
1579 0     0 1 0 {my ($parse, $alphabet, $op, $sub) = @_; # Sub quarks, the alphabet number, the operator name in that alphabet, subroutine definition
1580 0         0 my $a = &lexicalData->{alphabetsOrdered}{$alphabet}; # Alphabet
1581 0         0 my $n = $$Lex{lexicals}{$alphabet}{number}; # Number of lexical type
1582 0         0 my %i = map {$$a[$_]=>$_} keys @$a;
  0         0  
1583 0         0 my @b = ($n, map {$i{ord $_}} split //, $op); # Bytes representing the operator name
  0         0  
1584 0         0 my $s = join '', map {chr $_} @b; # String representation
  0         0  
1585 0         0 $parse->operators->put($s, $sub); # Add the string, subroutine combination to the sub quarks
1586             }
1587              
1588             sub dyad($$$) # Define a method for a dyadic operator.
1589 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1590 0         0 $parse->lexToSub("dyad", $text, $sub);
1591             }
1592              
1593             sub assign($$$) # Define a method for an assign operator.
1594 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1595 0         0 $parse->lexToSub("assign", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1596             }
1597              
1598             sub prefix($$$) # Define a method for a prefix operator.
1599 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1600 0         0 $parse->lexToSub("prefix", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1601             }
1602              
1603             sub suffix($$$) # Define a method for a suffix operator.
1604 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1605 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1606 0         0 $parse->operators->put(chr($n), $sub); # Add the variable subroutine to the sub quarks
1607             }
1608              
1609              
1610             sub ascii($$) # Define a method for ascii text.
1611 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1612 0         0 my $n = $$Lex{lexicals}{Ascii}{number}; # Lexical number of ascii
1613 0         0 $parse->operators->put(chr($n), $sub); # Add the ascii subroutine to the sub quarks
1614             }
1615              
1616             sub semiColon($$) # Define a method for the semicolon operator.
1617 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1618 0         0 my $n = $$Lex{lexicals}{semiColon}{number}; # Lexical number of semicolon
1619 0         0 $parse->operators->put(chr($n), $sub); # Add the semicolon subroutine to the sub quarks
1620 0         0 my $N = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semi colon
1621 0         0 $parse->operators->put(chr($N), $sub); # Add the semicolon subroutine to the sub quarks
1622             }
1623              
1624             sub variable($$) # Define a method for a variable.
1625 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1626 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1627 0         0 $parse->operators->put(chr($n), $sub); # Add the variable subroutine to the sub quarks
1628             }
1629              
1630             sub bracket($$$) # Define a method for a bracket operator.
1631 0     0 1 0 {my ($parse, $open, $sub) = @_; # Sub quarks, opening parenthesis, associated subroutine
1632 0         0 my $l = &lexicalData;
1633 0         0 my $s = join '', sort $l->{bracketsOpen}->@*;#, $l->{bracketsClose}->@*; # Bracket alphabet
1634 0         0 my $b = index($s, $open);
1635 0 0       0 $b < 0 and confess "No such bracket: $open";
1636 0         0 my $n = $$Lex{lexicals}{OpenBracket}{number}; # Lexical number of open bracket
1637 0         0 $parse->operators->put(chr($n).chr($b+1+$l->{bracketsBase}), $sub); # Why plus one? # Add the brackets subroutine to the sub quarks
1638             }
1639              
1640             #D1 Alphabets # Translate between alphabets.
1641              
1642             sub showAlphabet($) #P Show an alphabet.
1643 0     0 1 0 {my ($alphabet) = @_; # Alphabet name
1644 0         0 my $out;
1645 0         0 my $lex = &lexicalData;
1646 0         0 my $abc = $lex->{alphabetsOrdered}{$alphabet};
1647 0         0 for my $a(@$abc)
1648 0         0 {$out .= chr($a);
1649             }
1650             $out
1651 0         0 }
1652              
1653             sub asciiToAssignLatin($) # Translate ascii to the corresponding letters in the assign latin alphabet.
1654 0     0 1 0 {my ($in) = @_; # A string of ascii
1655 1     1   15290 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧/r;
  1         3  
  1         15  
  0         0  
1656             }
1657              
1658             sub asciiToAssignGreek($) # Translate ascii to the corresponding letters in the assign greek alphabet.
1659 0     0 1 0 {my ($in) = @_; # A string of ascii
1660 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔/r;
1661             }
1662              
1663             sub asciiToDyadLatin($) # Translate ascii to the corresponding letters in the dyad latin alphabet.
1664 0     0 1 0 {my ($in) = @_; # A string of ascii
1665 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳/r;
1666             }
1667              
1668             sub asciiToDyadGreek($) # Translate ascii to the corresponding letters in the dyad greek alphabet.
1669 0     0 1 0 {my ($in) = @_; # A string of ascii
1670 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚/r;
1671             }
1672              
1673             sub asciiToPrefixLatin($) # Translate ascii to the corresponding letters in the prefix latin alphabet.
1674 0     0 1 0 {my ($in) = @_; # A string of ascii
1675 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛/r;
1676             }
1677              
1678             sub asciiToPrefixGreek($) # Translate ascii to the corresponding letters in the prefix greek alphabet.
1679 0     0 1 0 {my ($in) = @_; # A string of ascii
1680 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎/r;
1681             }
1682              
1683             sub asciiToSuffixLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1684 0     0 1 0 {my ($in) = @_; # A string of ascii
1685 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯/r;
1686             }
1687              
1688             sub asciiToSuffixGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1689 0     0 1 0 {my ($in) = @_; # A string of ascii
1690 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂/r;
1691             }
1692              
1693             sub asciiToVariableLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1694 0     0 1 0 {my ($in) = @_; # A string of ascii
1695 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇/r;
1696             }
1697              
1698             sub asciiToVariableGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1699 0     0 1 0 {my ($in) = @_; # A string of ascii
1700 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈/r;
1701             }
1702              
1703             sub asciiToEscaped($) # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1704 0     0 1 0 {my ($in) = @_; # A string of ascii
1705 0         0 $in =~ tr/abcdefghijklmnopqrstuvwxyz/🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩/r;
1706             }
1707              
1708             sub semiColonChar() # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1709             {chr(10210)
1710             }
1711              
1712             #d
1713 1     1 0 1 sub lexicalData {do {
1714 1         310 my $a = bless({
1715             alphabetRanges => 14,
1716             alphabets => {
1717             "circledLatinLetter" => "\x{24B6}\x{24B7}\x{24B8}\x{24B9}\x{24BA}\x{24BB}\x{24BC}\x{24BD}\x{24BE}\x{24BF}\x{24C0}\x{24C1}\x{24C2}\x{24C3}\x{24C4}\x{24C5}\x{24C6}\x{24C7}\x{24C8}\x{24C9}\x{24CA}\x{24CB}\x{24CC}\x{24CD}\x{24CE}\x{24CF}\x{24D0}\x{24D1}\x{24D2}\x{24D3}\x{24D4}\x{24D5}\x{24D6}\x{24D7}\x{24D8}\x{24D9}\x{24DA}\x{24DB}\x{24DC}\x{24DD}\x{24DE}\x{24DF}\x{24E0}\x{24E1}\x{24E2}\x{24E3}\x{24E4}\x{24E5}\x{24E6}\x{24E7}\x{24E8}\x{24E9}",
1718             "mathematicalBold" => "\x{1D400}\x{1D401}\x{1D402}\x{1D403}\x{1D404}\x{1D405}\x{1D406}\x{1D407}\x{1D408}\x{1D409}\x{1D40A}\x{1D40B}\x{1D40C}\x{1D40D}\x{1D40E}\x{1D40F}\x{1D410}\x{1D411}\x{1D412}\x{1D413}\x{1D414}\x{1D415}\x{1D416}\x{1D417}\x{1D418}\x{1D419}\x{1D41A}\x{1D41B}\x{1D41C}\x{1D41D}\x{1D41E}\x{1D41F}\x{1D420}\x{1D421}\x{1D422}\x{1D423}\x{1D424}\x{1D425}\x{1D426}\x{1D427}\x{1D428}\x{1D429}\x{1D42A}\x{1D42B}\x{1D42C}\x{1D42D}\x{1D42E}\x{1D42F}\x{1D430}\x{1D431}\x{1D432}\x{1D433}\x{1D6A8}\x{1D6A9}\x{1D6AA}\x{1D6AB}\x{1D6AC}\x{1D6AD}\x{1D6AE}\x{1D6AF}\x{1D6B0}\x{1D6B1}\x{1D6B2}\x{1D6B3}\x{1D6B4}\x{1D6B5}\x{1D6B6}\x{1D6B7}\x{1D6B8}\x{1D6B9}\x{1D6BA}\x{1D6BB}\x{1D6BC}\x{1D6BD}\x{1D6BE}\x{1D6BF}\x{1D6C0}\x{1D6C1}\x{1D6C2}\x{1D6C3}\x{1D6C4}\x{1D6C5}\x{1D6C6}\x{1D6C7}\x{1D6C8}\x{1D6C9}\x{1D6CA}\x{1D6CB}\x{1D6CC}\x{1D6CD}\x{1D6CE}\x{1D6CF}\x{1D6D0}\x{1D6D1}\x{1D6D2}\x{1D6D3}\x{1D6D4}\x{1D6D5}\x{1D6D6}\x{1D6D7}\x{1D6D8}\x{1D6D9}\x{1D6DA}\x{1D6DB}\x{1D6DC}\x{1D6DD}\x{1D6DE}\x{1D6DF}\x{1D6E0}\x{1D6E1}",
1719             "mathematicalBoldFraktur" => "\x{1D56C}\x{1D56D}\x{1D56E}\x{1D56F}\x{1D570}\x{1D571}\x{1D572}\x{1D573}\x{1D574}\x{1D575}\x{1D576}\x{1D577}\x{1D578}\x{1D579}\x{1D57A}\x{1D57B}\x{1D57C}\x{1D57D}\x{1D57E}\x{1D57F}\x{1D580}\x{1D581}\x{1D582}\x{1D583}\x{1D584}\x{1D585}\x{1D586}\x{1D587}\x{1D588}\x{1D589}\x{1D58A}\x{1D58B}\x{1D58C}\x{1D58D}\x{1D58E}\x{1D58F}\x{1D590}\x{1D591}\x{1D592}\x{1D593}\x{1D594}\x{1D595}\x{1D596}\x{1D597}\x{1D598}\x{1D599}\x{1D59A}\x{1D59B}\x{1D59C}\x{1D59D}\x{1D59E}\x{1D59F}",
1720             "mathematicalBoldItalic" => "\x{1D468}\x{1D469}\x{1D46A}\x{1D46B}\x{1D46C}\x{1D46D}\x{1D46E}\x{1D46F}\x{1D470}\x{1D471}\x{1D472}\x{1D473}\x{1D474}\x{1D475}\x{1D476}\x{1D477}\x{1D478}\x{1D479}\x{1D47A}\x{1D47B}\x{1D47C}\x{1D47D}\x{1D47E}\x{1D47F}\x{1D480}\x{1D481}\x{1D482}\x{1D483}\x{1D484}\x{1D485}\x{1D486}\x{1D487}\x{1D488}\x{1D489}\x{1D48A}\x{1D48B}\x{1D48C}\x{1D48D}\x{1D48E}\x{1D48F}\x{1D490}\x{1D491}\x{1D492}\x{1D493}\x{1D494}\x{1D495}\x{1D496}\x{1D497}\x{1D498}\x{1D499}\x{1D49A}\x{1D49B}\x{1D71C}\x{1D71D}\x{1D71E}\x{1D71F}\x{1D720}\x{1D721}\x{1D722}\x{1D723}\x{1D724}\x{1D725}\x{1D726}\x{1D727}\x{1D728}\x{1D729}\x{1D72A}\x{1D72B}\x{1D72C}\x{1D72D}\x{1D72E}\x{1D72F}\x{1D730}\x{1D731}\x{1D732}\x{1D733}\x{1D734}\x{1D735}\x{1D736}\x{1D737}\x{1D738}\x{1D739}\x{1D73A}\x{1D73B}\x{1D73C}\x{1D73D}\x{1D73E}\x{1D73F}\x{1D740}\x{1D741}\x{1D742}\x{1D743}\x{1D744}\x{1D745}\x{1D746}\x{1D747}\x{1D748}\x{1D749}\x{1D74A}\x{1D74B}\x{1D74C}\x{1D74D}\x{1D74E}\x{1D74F}\x{1D750}\x{1D751}\x{1D752}\x{1D753}\x{1D754}\x{1D755}",
1721             "mathematicalBoldScript" => "\x{1D4D0}\x{1D4D1}\x{1D4D2}\x{1D4D3}\x{1D4D4}\x{1D4D5}\x{1D4D6}\x{1D4D7}\x{1D4D8}\x{1D4D9}\x{1D4DA}\x{1D4DB}\x{1D4DC}\x{1D4DD}\x{1D4DE}\x{1D4DF}\x{1D4E0}\x{1D4E1}\x{1D4E2}\x{1D4E3}\x{1D4E4}\x{1D4E5}\x{1D4E6}\x{1D4E7}\x{1D4E8}\x{1D4E9}\x{1D4EA}\x{1D4EB}\x{1D4EC}\x{1D4ED}\x{1D4EE}\x{1D4EF}\x{1D4F0}\x{1D4F1}\x{1D4F2}\x{1D4F3}\x{1D4F4}\x{1D4F5}\x{1D4F6}\x{1D4F7}\x{1D4F8}\x{1D4F9}\x{1D4FA}\x{1D4FB}\x{1D4FC}\x{1D4FD}\x{1D4FE}\x{1D4FF}\x{1D500}\x{1D501}\x{1D502}\x{1D503}",
1722             "mathematicalDouble-struck" => "\x{1D538}\x{1D539}\x{1D53B}\x{1D53C}\x{1D53D}\x{1D53E}\x{1D540}\x{1D541}\x{1D542}\x{1D543}\x{1D544}\x{1D546}\x{1D54A}\x{1D54B}\x{1D54C}\x{1D54D}\x{1D54E}\x{1D54F}\x{1D550}\x{1D552}\x{1D553}\x{1D554}\x{1D555}\x{1D556}\x{1D557}\x{1D558}\x{1D559}\x{1D55A}\x{1D55B}\x{1D55C}\x{1D55D}\x{1D55E}\x{1D55F}\x{1D560}\x{1D561}\x{1D562}\x{1D563}\x{1D564}\x{1D565}\x{1D566}\x{1D567}\x{1D568}\x{1D569}\x{1D56A}\x{1D56B}",
1723             "mathematicalFraktur" => "\x{1D504}\x{1D505}\x{1D507}\x{1D508}\x{1D509}\x{1D50A}\x{1D50D}\x{1D50E}\x{1D50F}\x{1D510}\x{1D511}\x{1D512}\x{1D513}\x{1D514}\x{1D516}\x{1D517}\x{1D518}\x{1D519}\x{1D51A}\x{1D51B}\x{1D51C}\x{1D51E}\x{1D51F}\x{1D520}\x{1D521}\x{1D522}\x{1D523}\x{1D524}\x{1D525}\x{1D526}\x{1D527}\x{1D528}\x{1D529}\x{1D52A}\x{1D52B}\x{1D52C}\x{1D52D}\x{1D52E}\x{1D52F}\x{1D530}\x{1D531}\x{1D532}\x{1D533}\x{1D534}\x{1D535}\x{1D536}\x{1D537}",
1724             "mathematicalItalic" => "\x{1D434}\x{1D435}\x{1D436}\x{1D437}\x{1D438}\x{1D439}\x{1D43A}\x{1D43B}\x{1D43C}\x{1D43D}\x{1D43E}\x{1D43F}\x{1D440}\x{1D441}\x{1D442}\x{1D443}\x{1D444}\x{1D445}\x{1D446}\x{1D447}\x{1D448}\x{1D449}\x{1D44A}\x{1D44B}\x{1D44C}\x{1D44D}\x{1D44E}\x{1D44F}\x{1D450}\x{1D451}\x{1D452}\x{1D453}\x{1D454}\x{1D456}\x{1D457}\x{1D458}\x{1D459}\x{1D45A}\x{1D45B}\x{1D45C}\x{1D45D}\x{1D45E}\x{1D45F}\x{1D460}\x{1D461}\x{1D462}\x{1D463}\x{1D464}\x{1D465}\x{1D466}\x{1D467}\x{1D6E2}\x{1D6E3}\x{1D6E4}\x{1D6E5}\x{1D6E6}\x{1D6E7}\x{1D6E8}\x{1D6E9}\x{1D6EA}\x{1D6EB}\x{1D6EC}\x{1D6ED}\x{1D6EE}\x{1D6EF}\x{1D6F0}\x{1D6F1}\x{1D6F2}\x{1D6F3}\x{1D6F4}\x{1D6F5}\x{1D6F6}\x{1D6F7}\x{1D6F8}\x{1D6F9}\x{1D6FA}\x{1D6FB}\x{1D6FC}\x{1D6FD}\x{1D6FE}\x{1D6FF}\x{1D700}\x{1D701}\x{1D702}\x{1D703}\x{1D704}\x{1D705}\x{1D706}\x{1D707}\x{1D708}\x{1D709}\x{1D70A}\x{1D70B}\x{1D70C}\x{1D70D}\x{1D70E}\x{1D70F}\x{1D710}\x{1D711}\x{1D712}\x{1D713}\x{1D714}\x{1D715}\x{1D716}\x{1D717}\x{1D718}\x{1D719}\x{1D71A}\x{1D71B}",
1725             "mathematicalMonospace" => "\x{1D670}\x{1D671}\x{1D672}\x{1D673}\x{1D674}\x{1D675}\x{1D676}\x{1D677}\x{1D678}\x{1D679}\x{1D67A}\x{1D67B}\x{1D67C}\x{1D67D}\x{1D67E}\x{1D67F}\x{1D680}\x{1D681}\x{1D682}\x{1D683}\x{1D684}\x{1D685}\x{1D686}\x{1D687}\x{1D688}\x{1D689}\x{1D68A}\x{1D68B}\x{1D68C}\x{1D68D}\x{1D68E}\x{1D68F}\x{1D690}\x{1D691}\x{1D692}\x{1D693}\x{1D694}\x{1D695}\x{1D696}\x{1D697}\x{1D698}\x{1D699}\x{1D69A}\x{1D69B}\x{1D69C}\x{1D69D}\x{1D69E}\x{1D69F}\x{1D6A0}\x{1D6A1}\x{1D6A2}\x{1D6A3}",
1726             "mathematicalSans-serif" => "\x{1D5A0}\x{1D5A1}\x{1D5A2}\x{1D5A3}\x{1D5A4}\x{1D5A5}\x{1D5A6}\x{1D5A7}\x{1D5A8}\x{1D5A9}\x{1D5AA}\x{1D5AB}\x{1D5AC}\x{1D5AD}\x{1D5AE}\x{1D5AF}\x{1D5B0}\x{1D5B1}\x{1D5B2}\x{1D5B3}\x{1D5B4}\x{1D5B5}\x{1D5B6}\x{1D5B7}\x{1D5B8}\x{1D5B9}\x{1D5BA}\x{1D5BB}\x{1D5BC}\x{1D5BD}\x{1D5BE}\x{1D5BF}\x{1D5C0}\x{1D5C1}\x{1D5C2}\x{1D5C3}\x{1D5C4}\x{1D5C5}\x{1D5C6}\x{1D5C7}\x{1D5C8}\x{1D5C9}\x{1D5CA}\x{1D5CB}\x{1D5CC}\x{1D5CD}\x{1D5CE}\x{1D5CF}\x{1D5D0}\x{1D5D1}\x{1D5D2}\x{1D5D3}",
1727             "mathematicalSans-serifBold" => "\x{1D5D4}\x{1D5D5}\x{1D5D6}\x{1D5D7}\x{1D5D8}\x{1D5D9}\x{1D5DA}\x{1D5DB}\x{1D5DC}\x{1D5DD}\x{1D5DE}\x{1D5DF}\x{1D5E0}\x{1D5E1}\x{1D5E2}\x{1D5E3}\x{1D5E4}\x{1D5E5}\x{1D5E6}\x{1D5E7}\x{1D5E8}\x{1D5E9}\x{1D5EA}\x{1D5EB}\x{1D5EC}\x{1D5ED}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{1D5F1}\x{1D5F2}\x{1D5F3}\x{1D5F4}\x{1D5F5}\x{1D5F6}\x{1D5F7}\x{1D5F8}\x{1D5F9}\x{1D5FA}\x{1D5FB}\x{1D5FC}\x{1D5FD}\x{1D5FE}\x{1D5FF}\x{1D600}\x{1D601}\x{1D602}\x{1D603}\x{1D604}\x{1D605}\x{1D606}\x{1D607}\x{1D756}\x{1D757}\x{1D758}\x{1D759}\x{1D75A}\x{1D75B}\x{1D75C}\x{1D75D}\x{1D75E}\x{1D75F}\x{1D760}\x{1D761}\x{1D762}\x{1D763}\x{1D764}\x{1D765}\x{1D766}\x{1D767}\x{1D768}\x{1D769}\x{1D76A}\x{1D76B}\x{1D76C}\x{1D76D}\x{1D76E}\x{1D76F}\x{1D770}\x{1D771}\x{1D772}\x{1D773}\x{1D774}\x{1D775}\x{1D776}\x{1D777}\x{1D778}\x{1D779}\x{1D77A}\x{1D77B}\x{1D77C}\x{1D77D}\x{1D77E}\x{1D77F}\x{1D780}\x{1D781}\x{1D782}\x{1D783}\x{1D784}\x{1D785}\x{1D786}\x{1D787}\x{1D788}\x{1D789}\x{1D78A}\x{1D78B}\x{1D78C}\x{1D78D}\x{1D78E}\x{1D78F}",
1728             "mathematicalSans-serifBoldItalic" => "\x{1D63C}\x{1D63D}\x{1D63E}\x{1D63F}\x{1D640}\x{1D641}\x{1D642}\x{1D643}\x{1D644}\x{1D645}\x{1D646}\x{1D647}\x{1D648}\x{1D649}\x{1D64A}\x{1D64B}\x{1D64C}\x{1D64D}\x{1D64E}\x{1D64F}\x{1D650}\x{1D651}\x{1D652}\x{1D653}\x{1D654}\x{1D655}\x{1D656}\x{1D657}\x{1D658}\x{1D659}\x{1D65A}\x{1D65B}\x{1D65C}\x{1D65D}\x{1D65E}\x{1D65F}\x{1D660}\x{1D661}\x{1D662}\x{1D663}\x{1D664}\x{1D665}\x{1D666}\x{1D667}\x{1D668}\x{1D669}\x{1D66A}\x{1D66B}\x{1D66C}\x{1D66D}\x{1D66E}\x{1D66F}\x{1D790}\x{1D791}\x{1D792}\x{1D793}\x{1D794}\x{1D795}\x{1D796}\x{1D797}\x{1D798}\x{1D799}\x{1D79A}\x{1D79B}\x{1D79C}\x{1D79D}\x{1D79E}\x{1D79F}\x{1D7A0}\x{1D7A1}\x{1D7A2}\x{1D7A3}\x{1D7A4}\x{1D7A5}\x{1D7A6}\x{1D7A7}\x{1D7A8}\x{1D7A9}\x{1D7AA}\x{1D7AB}\x{1D7AC}\x{1D7AD}\x{1D7AE}\x{1D7AF}\x{1D7B0}\x{1D7B1}\x{1D7B2}\x{1D7B3}\x{1D7B4}\x{1D7B5}\x{1D7B6}\x{1D7B7}\x{1D7B8}\x{1D7B9}\x{1D7BA}\x{1D7BB}\x{1D7BC}\x{1D7BD}\x{1D7BE}\x{1D7BF}\x{1D7C0}\x{1D7C1}\x{1D7C2}\x{1D7C3}\x{1D7C4}\x{1D7C5}\x{1D7C6}\x{1D7C7}\x{1D7C8}\x{1D7C9}",
1729             "mathematicalSans-serifItalic" => "\x{1D608}\x{1D609}\x{1D60A}\x{1D60B}\x{1D60C}\x{1D60D}\x{1D60E}\x{1D60F}\x{1D610}\x{1D611}\x{1D612}\x{1D613}\x{1D614}\x{1D615}\x{1D616}\x{1D617}\x{1D618}\x{1D619}\x{1D61A}\x{1D61B}\x{1D61C}\x{1D61D}\x{1D61E}\x{1D61F}\x{1D620}\x{1D621}\x{1D622}\x{1D623}\x{1D624}\x{1D625}\x{1D626}\x{1D627}\x{1D628}\x{1D629}\x{1D62A}\x{1D62B}\x{1D62C}\x{1D62D}\x{1D62E}\x{1D62F}\x{1D630}\x{1D631}\x{1D632}\x{1D633}\x{1D634}\x{1D635}\x{1D636}\x{1D637}\x{1D638}\x{1D639}\x{1D63A}\x{1D63B}",
1730             "mathematicalScript" => "\x{1D49C}\x{1D49E}\x{1D49F}\x{1D4A2}\x{1D4A5}\x{1D4A6}\x{1D4A9}\x{1D4AA}\x{1D4AB}\x{1D4AC}\x{1D4AE}\x{1D4AF}\x{1D4B0}\x{1D4B1}\x{1D4B2}\x{1D4B3}\x{1D4B4}\x{1D4B5}\x{1D4B6}\x{1D4B7}\x{1D4B8}\x{1D4B9}\x{1D4BB}\x{1D4BD}\x{1D4BE}\x{1D4BF}\x{1D4C0}\x{1D4C1}\x{1D4C2}\x{1D4C3}\x{1D4C5}\x{1D4C6}\x{1D4C7}\x{1D4C8}\x{1D4C9}\x{1D4CA}\x{1D4CB}\x{1D4CC}\x{1D4CD}\x{1D4CE}\x{1D4CF}",
1731             "negativeCircledLatinLetter" => "\x{1F150}\x{1F151}\x{1F152}\x{1F153}\x{1F154}\x{1F155}\x{1F156}\x{1F157}\x{1F158}\x{1F159}\x{1F15A}\x{1F15B}\x{1F15C}\x{1F15D}\x{1F15E}\x{1F15F}\x{1F160}\x{1F161}\x{1F162}\x{1F163}\x{1F164}\x{1F165}\x{1F166}\x{1F167}\x{1F168}\x{1F169}",
1732             "negativeSquaredLatinLetter" => "\x{1F170}\x{1F171}\x{1F172}\x{1F173}\x{1F174}\x{1F175}\x{1F176}\x{1F177}\x{1F178}\x{1F179}\x{1F17A}\x{1F17B}\x{1F17C}\x{1F17D}\x{1F17E}\x{1F17F}\x{1F180}\x{1F181}\x{1F182}\x{1F183}\x{1F184}\x{1F185}\x{1F186}\x{1F187}\x{1F188}\x{1F189}",
1733             "planck" => "\x{210E}",
1734             "semiColon" => "\x{27E2}",
1735             "squaredLatinLetter" => "\x{1F130}\x{1F131}\x{1F132}\x{1F133}\x{1F134}\x{1F135}\x{1F136}\x{1F137}\x{1F138}\x{1F139}\x{1F13A}\x{1F13B}\x{1F13C}\x{1F13D}\x{1F13E}\x{1F13F}\x{1F140}\x{1F141}\x{1F142}\x{1F143}\x{1F144}\x{1F145}\x{1F146}\x{1F147}\x{1F148}\x{1F149}\x{1F1A5}",
1736             },
1737             alphabetsOrdered => {
1738             Ascii => [0 .. 127, 127312 .. 127337],
1739             assign => [8462, 119860 .. 119911, 120546 .. 120603],
1740             dyad => [119808 .. 119859, 120488 .. 120545],
1741             prefix => [119912 .. 119963, 120604 .. 120661],
1742             semiColon => [10210],
1743             suffix => [120380 .. 120431, 120720 .. 120777],
1744             variable => [120276 .. 120327, 120662 .. 120719],
1745             },
1746             brackets => 16,
1747             bracketsBase => 16,
1748             bracketsClose => [
1749             "\x{2309}",
1750             "\x{230B}",
1751             "\x{232A}",
1752             "\x{2769}",
1753             "\x{276B}",
1754             "\x{276D}",
1755             "\x{276F}",
1756             "\x{2771}",
1757             "\x{2773}",
1758             "\x{2775}",
1759             "\x{27E7}",
1760             "\x{27E9}",
1761             "\x{27EB}",
1762             "\x{27ED}",
1763             "\x{27EF}",
1764             "\x{2984}",
1765             "\x{2986}",
1766             "\x{2988}",
1767             "\x{298A}",
1768             "\x{298C}",
1769             "\x{298E}",
1770             "\x{2990}",
1771             "\x{2992}",
1772             "\x{2994}",
1773             "\x{2996}",
1774             "\x{2998}",
1775             "\x{29FD}",
1776             "\x{2E29}",
1777             "\x{3009}",
1778             "\x{300B}",
1779             "\x{3011}",
1780             "\x{3015}",
1781             "\x{3017}",
1782             "\x{3019}",
1783             "\x{301B}",
1784             "\x{FD3F}",
1785             "\x{FF09}",
1786             "\x{FF60}",
1787             ],
1788             bracketsHigh => [
1789             "0x1300230b",
1790             "0x1500232a",
1791             "0x23002775",
1792             "0x2d0027ef",
1793             "0x43002998",
1794             "0x450029fd",
1795             "0x47002e29",
1796             "0x4b00300b",
1797             "0x4d003011",
1798             "0x5500301b",
1799             "0x5700fd3f",
1800             "0x5900ff09",
1801             "0x5b00ff60",
1802             0,
1803             0,
1804             0,
1805             ],
1806             bracketsLow => [
1807             "0x10002308",
1808             "0x14002329",
1809             "0x16002768",
1810             "0x240027e6",
1811             "0x2e002983",
1812             "0x440029fc",
1813             "0x46002e28",
1814             "0x48003008",
1815             "0x4c003010",
1816             "0x4e003014",
1817             "0x5600fd3e",
1818             "0x5800ff08",
1819             "0x5a00ff5f",
1820             0,
1821             0,
1822             0,
1823             ],
1824             bracketsOpen => [
1825             "\x{2308}",
1826             "\x{230A}",
1827             "\x{2329}",
1828             "\x{2768}",
1829             "\x{276A}",
1830             "\x{276C}",
1831             "\x{276E}",
1832             "\x{2770}",
1833             "\x{2772}",
1834             "\x{2774}",
1835             "\x{27E6}",
1836             "\x{27E8}",
1837             "\x{27EA}",
1838             "\x{27EC}",
1839             "\x{27EE}",
1840             "\x{2983}",
1841             "\x{2985}",
1842             "\x{2987}",
1843             "\x{2989}",
1844             "\x{298B}",
1845             "\x{298D}",
1846             "\x{298F}",
1847             "\x{2991}",
1848             "\x{2993}",
1849             "\x{2995}",
1850             "\x{2997}",
1851             "\x{29FC}",
1852             "\x{2E28}",
1853             "\x{3008}",
1854             "\x{300A}",
1855             "\x{3010}",
1856             "\x{3014}",
1857             "\x{3016}",
1858             "\x{3018}",
1859             "\x{301A}",
1860             "\x{FD3E}",
1861             "\x{FF08}",
1862             "\x{FF5F}",
1863             ],
1864             lexicalAlpha => {
1865             "" => [
1866             "circledLatinLetter",
1867             "mathematicalBoldFraktur",
1868             "mathematicalBoldScript",
1869             "mathematicalDouble-struck",
1870             "mathematicalFraktur",
1871             "mathematicalMonospace",
1872             "mathematicalSans-serif",
1873             "mathematicalSans-serifItalic",
1874             "mathematicalScript",
1875             "negativeSquaredLatinLetter",
1876             "semiColon",
1877             "squaredLatinLetter",
1878             ],
1879             "Ascii" => ["negativeCircledLatinLetter"],
1880             "assign" => ["mathematicalItalic", "planck"],
1881             "CloseBracket" => [],
1882             "dyad" => ["mathematicalBold"],
1883             "OpenBracket" => [],
1884             "prefix" => ["mathematicalBoldItalic"],
1885             "semiColon" => [],
1886             "suffix" => ["mathematicalSans-serifBoldItalic"],
1887             "term" => [],
1888             "variable" => ["mathematicalSans-serifBold"],
1889             },
1890             lexicalHigh => [
1891             127,
1892             8462,
1893             10210,
1894             119859,
1895             16897127,
1896             119963,
1897             120327,
1898             120431,
1899             872535777,
1900             889313051,
1901             872535893,
1902             872535951,
1903             872536009,
1904             2147610985,
1905             0,
1906             0,
1907             ],
1908             lexicalLow => [
1909             33554432,
1910             83894542,
1911             134227938,
1912             50451456,
1913             84005940,
1914             67228776,
1915             100783572,
1916             117560892,
1917             50452136,
1918             84006626,
1919             67229468,
1920             100783958,
1921             117561232,
1922             33681744,
1923             0,
1924             0,
1925             ],
1926             lexicals => bless({
1927             Ascii => bless({ letter => "a", like => "v", name => "Ascii", number => 2 }, "Unisyn::Parse::Lexical::Constant"),
1928             assign => bless({ letter => "a", like => "a", name => "assign", number => 5 }, "Unisyn::Parse::Lexical::Constant"),
1929             CloseBracket => bless({ letter => "B", like => "B", name => "CloseBracket", number => 1 }, "Unisyn::Parse::Lexical::Constant"),
1930             dyad => bless({ letter => "d", like => "d", name => "dyad", number => 3 }, "Unisyn::Parse::Lexical::Constant"),
1931             empty => bless({ letter => "e", like => "e", name => "empty", number => 10 }, "Unisyn::Parse::Lexical::Constant"),
1932             NewLineSemiColon => bless({ letter => "N", like => undef, name => "NewLineSemiColon", number => 12 }, "Unisyn::Parse::Lexical::Constant"),
1933             OpenBracket => bless({ letter => "b", like => "b", name => "OpenBracket", number => 0 }, "Unisyn::Parse::Lexical::Constant"),
1934             prefix => bless({ letter => "p", like => "p", name => "prefix", number => 4 }, "Unisyn::Parse::Lexical::Constant"),
1935             semiColon => bless({ letter => "s", like => "s", name => "semiColon", number => 8 }, "Unisyn::Parse::Lexical::Constant"),
1936             suffix => bless({ letter => "q", like => "q", name => "suffix", number => 7 }, "Unisyn::Parse::Lexical::Constant"),
1937             term => bless({ letter => "t", like => "t", name => "term", number => 9 }, "Unisyn::Parse::Lexical::Constant"),
1938             variable => bless({ letter => "v", like => "v", name => "variable", number => 6 }, "Unisyn::Parse::Lexical::Constant"),
1939             WhiteSpace => bless({ letter => "W", like => undef, name => "WhiteSpace", number => 11 }, "Unisyn::Parse::Lexical::Constant"),
1940             }, "Unisyn::Parse::Lexicals"),
1941             sampleLexicals => {
1942             A => [
1943             100663296,
1944             83886080,
1945             33554497,
1946             33554464,
1947             33554497,
1948             33554464,
1949             33554464,
1950             33554464,
1951             33554464,
1952             ],
1953             Adv => [
1954             100663296,
1955             83886080,
1956             33554497,
1957             33554464,
1958             33554497,
1959             33554464,
1960             33554464,
1961             33554464,
1962             33554464,
1963             50331648,
1964             100663296,
1965             ],
1966             BB => [
1967             0,
1968             0,
1969             0,
1970             0,
1971             0,
1972             0,
1973             0,
1974             0,
1975             100663296,
1976             16777216,
1977             16777216,
1978             16777216,
1979             16777216,
1980             16777216,
1981             16777216,
1982             16777216,
1983             16777216,
1984             ],
1985             brackets => [
1986             100663296,
1987             83886080,
1988             0,
1989             0,
1990             0,
1991             100663296,
1992             16777216,
1993             16777216,
1994             50331648,
1995             0,
1996             100663296,
1997             16777216,
1998             16777216,
1999             134217728,
2000             ],
2001             bvB => [0, 100663296, 16777216],
2002             nosemi => [
2003             100663296,
2004             83886080,
2005             0,
2006             0,
2007             0,
2008             100663296,
2009             16777216,
2010             16777216,
2011             50331648,
2012             0,
2013             100663296,
2014             16777216,
2015             16777216,
2016             ],
2017             ppppvdvdvqqqq => [
2018             0,
2019             0,
2020             0,
2021             100663296,
2022             83886080,
2023             100663296,
2024             50331648,
2025             0,
2026             100663296,
2027             50331648,
2028             100663296,
2029             16777216,
2030             134217728,
2031             100663296,
2032             83886080,
2033             100663296,
2034             50331648,
2035             100663296,
2036             16777216,
2037             16777216,
2038             16777216,
2039             ],
2040             s => [100663296, 134217728, 100663296],
2041             s1 => [
2042             100663296,
2043             83886080,
2044             33554442,
2045             33554464,
2046             33554464,
2047             33554497,
2048             33554442,
2049             33554464,
2050             33554464,
2051             33554464,
2052             ],
2053             v => [100663296],
2054             vav => [100663296, 83886080, 100663296],
2055             vavav => [100663296, 83886080, 100663296, 83886080, 100663296],
2056             vnsvs => [
2057             100663296,
2058             33554442,
2059             33554464,
2060             33554464,
2061             33554464,
2062             100663296,
2063             33554464,
2064             33554464,
2065             33554464,
2066             ],
2067             vnv => [100663296, 33554442, 100663296],
2068             vnvs => [
2069             100663296,
2070             33554442,
2071             100663296,
2072             33554464,
2073             33554464,
2074             33554464,
2075             33554464,
2076             ],
2077             ws => [
2078             100663296,
2079             83886080,
2080             0,
2081             0,
2082             0,
2083             100663296,
2084             16777216,
2085             16777216,
2086             50331648,
2087             0,
2088             100663296,
2089             16777216,
2090             16777216,
2091             134217728,
2092             100663296,
2093             83886080,
2094             0,
2095             100663296,
2096             50331648,
2097             100663296,
2098             16777216,
2099             134217728,
2100             ],
2101             wsa => [
2102             100663296,
2103             83886080,
2104             0,
2105             0,
2106             0,
2107             100663296,
2108             16777216,
2109             16777216,
2110             50331648,
2111             0,
2112             100663296,
2113             16777216,
2114             16777216,
2115             134217728,
2116             100663296,
2117             83886080,
2118             33554497,
2119             50331648,
2120             100663296,
2121             134217728,
2122             ],
2123             },
2124             sampleText => {
2125             A => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 ",
2126             Adv => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 \x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D603}\x{1D5EE}\x{1D5FF}",
2127             BB => "\x{230A}\x{2329}\x{2768}\x{276A}\x{276C}\x{276E}\x{2770}\x{2772}\x{1D5EE}\x{2773}\x{2771}\x{276F}\x{276D}\x{276B}\x{2769}\x{232A}\x{230B}",
2128             brackets => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}",
2129             bvB => "\x{2329}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{232A}",
2130             nosemi => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}",
2131             ppppvdvdvqqqq => "\x{1D482}\x{2774}\x{1D483}\x{27E6}\x{1D484}\x{27E8}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}\x{1D485}\x{1D5EF}\x{1D659}\x{1D42D}\x{1D422}\x{1D426}\x{1D41E}\x{1D42C}\x{27EA}\x{1D5F0}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F1}\x{27EB}\x{27E2}\x{1D5F2}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{1D5F3}\x{1D42C}\x{1D42E}\x{1D41B}\x{1D5F4}\x{1D65D}\x{27E9}\x{1D658}\x{27E7}\x{1D657}\x{2775}\x{1D656}",
2132             s => "\x{1D5EE}\x{27E2}\x{1D5EF}",
2133             s1 => "\x{1D5EE}\x{1D44E}\n \n ",
2134             v => "\x{1D5EE}",
2135             vav => "\x{1D5EE}\x{1D44E}\x{1D5EF}",
2136             vavav => "\x{1D5EE}\x{1D44E}\x{1D5EF}\x{1D44E}\x{1D5F0}",
2137             vnsvs => "\x{1D5EE}\x{1D5EE}\n \x{1D5EF}\x{1D5EF} ",
2138             vnv => "\x{1D5EE}\n\x{1D5EF}",
2139             vnvs => "\x{1D5EE}\n\x{1D5EF} ",
2140             ws => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{276C}\x{1D5EF}\x{1D5EF}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{276D}\x{27E2}",
2141             wsa => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}some--ascii--text\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{27E2}",
2142             },
2143             semiColon => "\x{27E2}",
2144             separator => "\x{205F}",
2145             structure => bless({
2146             codes => bless({
2147             a => bless({
2148             letter => "a",
2149             name => "assignment operator",
2150             next => "bpv",
2151             short => "assign",
2152             }, "Tree::Term::LexicalCode"),
2153             b => bless({
2154             letter => "b",
2155             name => "opening parenthesis",
2156             next => "bBpsv",
2157             short => "OpenBracket",
2158             }, "Tree::Term::LexicalCode"),
2159             B => bless({
2160             letter => "B",
2161             name => "closing parenthesis",
2162             next => "aBdqs",
2163             short => "CloseBracket",
2164             }, "Tree::Term::LexicalCode"),
2165             d => bless({ letter => "d", name => "dyadic operator", next => "bpv", short => "dyad" }, "Tree::Term::LexicalCode"),
2166             p => bless({ letter => "p", name => "prefix operator", next => "bpv", short => "prefix" }, "Tree::Term::LexicalCode"),
2167             q => bless({
2168             letter => "q",
2169             name => "suffix operator",
2170             next => "aBdqs",
2171             short => "suffix",
2172             }, "Tree::Term::LexicalCode"),
2173             s => bless({ letter => "s", name => "semi-colon", next => "bBpsv", short => "semiColon" }, "Tree::Term::LexicalCode"),
2174             t => bless({ letter => "t", name => "term", next => "aBdqs", short => "term" }, "Tree::Term::LexicalCode"),
2175             v => bless({ letter => "v", name => "variable", next => "aBdqs", short => "variable" }, "Tree::Term::LexicalCode"),
2176             }, "Tree::Term::Codes"),
2177             first => "bpsv",
2178             last => "Bqsv",
2179             }, "Tree::Term::LexicalStructure"),
2180             treeTermLexicals => 'fix',
2181             }, "Unisyn::Parse::Lexical::Tables");
2182 1         5 $a->{treeTermLexicals} = $a->{structure}{codes};
2183 1         4 $a;
2184             }}
2185              
2186             #-------------------------------------------------------------------------------
2187             # Export - eeee
2188             #-------------------------------------------------------------------------------
2189              
2190 1     1   10924 use Exporter qw(import);
  1         2  
  1         69  
2191              
2192 1     1   7 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         2  
  1         478  
2193              
2194             @ISA = qw(Exporter);
2195             @EXPORT = qw();
2196             @EXPORT_OK = qw();
2197             %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2198              
2199             # podDocumentation
2200             =pod
2201              
2202             =encoding utf-8
2203              
2204             =head1 Name
2205              
2206             Unisyn::Parse - Parse a Unisyn expression.
2207              
2208             =head1 Synopsis
2209              
2210             Parse the B expression:
2211              
2212             𝒂 ❴ 𝒃 ⟦𝒄⟨ 𝗮 𝑒𝑞𝑢𝑎𝑙𝑠 𝒅 𝗯 𝙙 𝐭𝐢𝐦𝐞𝐬 ⟪𝗰 𝐩𝐥𝐮𝐬 𝗱⟫⟢ 𝗲 𝑎𝑠𝑠𝑖𝑔𝑛 𝗳 𝐬𝐮𝐛 𝗴 𝙝⟩ 𝙘 ⟧ 𝙗 ❵ 𝙖
2213              
2214             To get:
2215              
2216             Suffix: 𝙖
2217             Term
2218             Prefix: 𝒂
2219             Term
2220             Brackets: ⦇⦈
2221             Term
2222             Term
2223             Suffix: 𝙗
2224             Term
2225             Prefix: 𝒃
2226             Term
2227             Brackets: ⦋⦌
2228             Term
2229             Term
2230             Suffix: 𝙘
2231             Term
2232             Prefix: 𝒄
2233             Term
2234             Brackets: ⦏⦐
2235             Term
2236             Term
2237             Semicolon
2238             Term
2239             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2240             Term
2241             Variable: 𝗮
2242             Term
2243             Dyad: 𝐭𝐢𝐦𝐞𝐬
2244             Term
2245             Suffix: 𝙙
2246             Term
2247             Prefix: 𝒅
2248             Term
2249             Variable: 𝗯
2250             Term
2251             Brackets: ⦓⦔
2252             Term
2253             Term
2254             Dyad: 𝐩𝐥𝐮𝐬
2255             Term
2256             Variable: 𝗰
2257             Term
2258             Variable: 𝗱
2259             Term
2260             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2261             Term
2262             Variable: 𝗲
2263             Term
2264             Dyad: 𝐬𝐮𝐛
2265             Term
2266             Variable: 𝗳
2267             Term
2268             Suffix: 𝙝
2269             Term
2270             Variable: 𝗴
2271              
2272             Then traverse the parse tree printing the type of each node:
2273              
2274             variable
2275             variable
2276             prefix_d
2277             suffix_d
2278             variable
2279             variable
2280             plus
2281             times
2282             equals
2283             variable
2284             variable
2285             variable
2286             sub
2287             assign
2288             semiColon
2289             brackets_3
2290             prefix_c
2291             suffix_c
2292             brackets_2
2293             prefix_b
2294             suffix_b
2295             brackets_1
2296             prefix_a
2297             suffix_a
2298              
2299             =head1 Description
2300              
2301             Parse a Unisyn expression.
2302              
2303              
2304             Version "20210927".
2305              
2306              
2307             The following sections describe the methods in each functional area of this
2308             module. For an alphabetic listing of all methods by name see L.
2309              
2310              
2311              
2312             =head1 Create
2313              
2314             Create a Unisyn parse of a utf8 string.
2315              
2316             =head2 create($address, %options)
2317              
2318             Create a new unisyn parse from a utf8 string.
2319              
2320             Parameter Description
2321             1 $address Address of a zero terminated utf8 source string to parse as a variable
2322             2 %options Parse options.
2323              
2324             B
2325              
2326              
2327              
2328             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2329              
2330              
2331             ok Assemble(debug => 0, eq => <
2332             Assign: 𝑎
2333             Term
2334             Variable: 𝗮
2335             Term
2336             Variable: 𝗯
2337             END
2338              
2339              
2340             =head1 Parse
2341              
2342             Parse Unisyn expressions
2343              
2344             =head1 Traverse
2345              
2346             Traverse the parse tree
2347              
2348             =head2 traverseParseTree($parse)
2349              
2350             Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
2351              
2352             Parameter Description
2353             1 $parse Parse tree
2354              
2355             B
2356              
2357              
2358             my $s = Rutf8 $Lex->{sampleText}{Adv}; # Ascii
2359             my $p = create K(address, $s), operators => \&printOperatorSequence;
2360              
2361             K(address, $s)->printOutZeroString;
2362             # $p->dumpParseTree;
2363             $p->print;
2364              
2365             $p->traverseParseTree; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2366              
2367              
2368             Assemble(debug => 0, eq => <
2369             𝗮𝗮𝑒𝑞𝑢𝑎𝑙𝑠abc 123 𝐩𝐥𝐮𝐬𝘃𝗮𝗿
2370             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2371             Term
2372             Variable: 𝗮𝗮
2373             Term
2374             Dyad: 𝐩𝐥𝐮𝐬
2375             Term
2376             Ascii: abc 123
2377             Term
2378             Variable: 𝘃𝗮𝗿
2379             variable
2380             ascii
2381             variable
2382             plus
2383             equals
2384             END
2385              
2386             my $s = Rutf8 $Lex->{sampleText}{ws};
2387             my $p = create (K(address, $s), operators => \&printOperatorSequence);
2388              
2389             K(address, $s)->printOutZeroString; # Print input string
2390             $p->print; # Print parse
2391              
2392             $p->traverseParseTree; # Traverse tree printing terms # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2393              
2394              
2395             Assemble(debug => 0, eq => <
2396             𝗮𝑎𝑠𝑠𝑖𝑔𝑛⌊〈❨𝗯𝗽❩〉𝐩𝐥𝐮𝐬❪𝘀𝗰❫⌋⟢𝗮𝗮𝑎𝑠𝑠𝑖𝑔𝑛❬𝗯𝗯𝐩𝐥𝐮𝐬𝗰𝗰❭⟢
2397             Semicolon
2398             Term
2399             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2400             Term
2401             Variable: 𝗮
2402             Term
2403             Brackets: ⌊⌋
2404             Term
2405             Term
2406             Dyad: 𝐩𝐥𝐮𝐬
2407             Term
2408             Brackets: ❨❩
2409             Term
2410             Term
2411             Brackets: ❬❭
2412             Term
2413             Term
2414             Variable: 𝗯𝗽
2415             Term
2416             Brackets: ❰❱
2417             Term
2418             Term
2419             Variable: 𝘀𝗰
2420             Term
2421             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2422             Term
2423             Variable: 𝗮𝗮
2424             Term
2425             Brackets: ❴❵
2426             Term
2427             Term
2428             Dyad: 𝐩𝐥𝐮𝐬
2429             Term
2430             Variable: 𝗯𝗯
2431             Term
2432             Variable: 𝗰𝗰
2433             variable
2434             variable
2435             variable
2436             plus
2437             assign
2438             variable
2439             variable
2440             variable
2441             plus
2442             assign
2443             semiColon
2444             END
2445              
2446              
2447             =head1 Print
2448              
2449             Print a parse tree
2450              
2451             =head2 print($parse)
2452              
2453             Print a parse tree.
2454              
2455             Parameter Description
2456             1 $parse Parse tree
2457              
2458             B
2459              
2460              
2461              
2462             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2463              
2464              
2465             ok Assemble(debug => 0, eq => <
2466             Assign: 𝑎
2467             Term
2468             Variable: 𝗮
2469             Term
2470             Variable: 𝗯
2471             END
2472              
2473              
2474             =head2 dumpParseTree($parse)
2475              
2476             Dump the parse tree.
2477              
2478             Parameter Description
2479             1 $parse Parse tree
2480              
2481             =head1 Execute
2482              
2483             Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
2484              
2485             =head2 lexToSub($parse, $alphabet, $op, $sub)
2486              
2487             Map a lexical item to a processing subroutine.
2488              
2489             Parameter Description
2490             1 $parse Sub quarks
2491             2 $alphabet The alphabet number
2492             3 $op The operator name in that alphabet
2493             4 $sub Subroutine definition
2494              
2495             =head2 dyad($parse, $text, $sub)
2496              
2497             Define a method for a dyadic operator.
2498              
2499             Parameter Description
2500             1 $parse Sub quarks
2501             2 $text The name of the operator as a utf8 string
2502             3 $sub Associated subroutine definition
2503              
2504             =head2 assign($parse, $text, $sub)
2505              
2506             Define a method for an assign operator.
2507              
2508             Parameter Description
2509             1 $parse Sub quarks
2510             2 $text The name of the operator as a utf8 string
2511             3 $sub Associated subroutine definition
2512              
2513             =head2 prefix($parse, $text, $sub)
2514              
2515             Define a method for a prefix operator.
2516              
2517             Parameter Description
2518             1 $parse Sub quarks
2519             2 $text The name of the operator as a utf8 string
2520             3 $sub Associated subroutine definition
2521              
2522             =head2 suffix($parse, $text, $sub)
2523              
2524             Define a method for a suffix operator.
2525              
2526             Parameter Description
2527             1 $parse Sub quarks
2528             2 $text The name of the operator as a utf8 string
2529             3 $sub Associated subroutine definition
2530              
2531             =head2 ascii($parse, $sub)
2532              
2533             Define a method for ascii text.
2534              
2535             Parameter Description
2536             1 $parse Sub quarks
2537             2 $sub Associated subroutine definition
2538              
2539             =head2 semiColon($parse, $sub)
2540              
2541             Define a method for the semicolon operator.
2542              
2543             Parameter Description
2544             1 $parse Sub quarks
2545             2 $sub Associated subroutine definition
2546              
2547             =head2 variable($parse, $sub)
2548              
2549             Define a method for a variable.
2550              
2551             Parameter Description
2552             1 $parse Sub quarks
2553             2 $sub Associated subroutine definition
2554              
2555             =head2 bracket($parse, $open, $sub)
2556              
2557             Define a method for a bracket operator.
2558              
2559             Parameter Description
2560             1 $parse Sub quarks
2561             2 $open Opening parenthesis
2562             3 $sub Associated subroutine
2563              
2564             =head1 Alphabets
2565              
2566             Translate between alphabets.
2567              
2568             =head2 asciiToAssignLatin($in)
2569              
2570             Translate ascii to the corresponding letters in the assign latin alphabet.
2571              
2572             Parameter Description
2573             1 $in A string of ascii
2574              
2575             =head2 asciiToAssignGreek($in)
2576              
2577             Translate ascii to the corresponding letters in the assign greek alphabet.
2578              
2579             Parameter Description
2580             1 $in A string of ascii
2581              
2582             =head2 asciiToDyadLatin($in)
2583              
2584             Translate ascii to the corresponding letters in the dyad latin alphabet.
2585              
2586             Parameter Description
2587             1 $in A string of ascii
2588              
2589             =head2 asciiToDyadGreek($in)
2590              
2591             Translate ascii to the corresponding letters in the dyad greek alphabet.
2592              
2593             Parameter Description
2594             1 $in A string of ascii
2595              
2596             =head2 asciiToPrefixLatin($in)
2597              
2598             Translate ascii to the corresponding letters in the prefix latin alphabet.
2599              
2600             Parameter Description
2601             1 $in A string of ascii
2602              
2603             =head2 asciiToPrefixGreek($in)
2604              
2605             Translate ascii to the corresponding letters in the prefix greek alphabet.
2606              
2607             Parameter Description
2608             1 $in A string of ascii
2609              
2610             =head2 asciiToSuffixLatin($in)
2611              
2612             Translate ascii to the corresponding letters in the suffix latin alphabet.
2613              
2614             Parameter Description
2615             1 $in A string of ascii
2616              
2617             =head2 asciiToSuffixGreek($in)
2618              
2619             Translate ascii to the corresponding letters in the suffix greek alphabet.
2620              
2621             Parameter Description
2622             1 $in A string of ascii
2623              
2624             =head2 asciiToVariableLatin($in)
2625              
2626             Translate ascii to the corresponding letters in the suffix latin alphabet.
2627              
2628             Parameter Description
2629             1 $in A string of ascii
2630              
2631             =head2 asciiToVariableGreek($in)
2632              
2633             Translate ascii to the corresponding letters in the suffix greek alphabet.
2634              
2635             Parameter Description
2636             1 $in A string of ascii
2637              
2638             =head2 asciiToEscaped($in)
2639              
2640             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2641              
2642             Parameter Description
2643             1 $in A string of ascii
2644              
2645             =head2 semiColonChar()
2646              
2647             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2648              
2649              
2650             =head2 printOperatorSequence($parse)
2651              
2652             Print the operator calling sequence.
2653              
2654             Parameter Description
2655             1 $parse Parse
2656              
2657              
2658             =head1 Hash Definitions
2659              
2660              
2661              
2662              
2663             =head2 Unisyn::Parse Definition
2664              
2665              
2666             Description of parse
2667              
2668              
2669              
2670              
2671             =head3 Output fields
2672              
2673              
2674             =head4 address8
2675              
2676             Address of source string as utf8
2677              
2678             =head4 arena
2679              
2680             Arena containing tree
2681              
2682             =head4 fails
2683              
2684             Number of failures encountered in this parse
2685              
2686             =head4 operators
2687              
2688             Methods implementing each lexical operator
2689              
2690             =head4 parse
2691              
2692             Offset to the head of the parse tree
2693              
2694             =head4 quarks
2695              
2696             Quarks representing the strings used in this parse
2697              
2698             =head4 size8
2699              
2700             Size of source string as utf8
2701              
2702             =head4 source32
2703              
2704             Source text as utf32
2705              
2706             =head4 sourceLength32
2707              
2708             Length of utf32 string
2709              
2710             =head4 sourceSize32
2711              
2712             Size of utf32 allocation
2713              
2714              
2715              
2716             =head1 Private Methods
2717              
2718             =head2 getAlpha($register, $address, $index)
2719              
2720             Load the position of a lexical item in its alphabet from the current character.
2721              
2722             Parameter Description
2723             1 $register Register to load
2724             2 $address Address of start of string
2725             3 $index Index into string
2726              
2727             =head2 getLexicalCode($register, $address, $index)
2728              
2729             Load the lexical code of the current character in memory into the specified register.
2730              
2731             Parameter Description
2732             1 $register Register to load
2733             2 $address Address of start of string
2734             3 $index Index into string
2735              
2736             =head2 putLexicalCode($register, $address, $index, $code)
2737              
2738             Put the specified lexical code into the current character in memory.
2739              
2740             Parameter Description
2741             1 $register Register used to load code
2742             2 $address Address of string
2743             3 $index Index into string
2744             4 $code Code to put
2745              
2746             =head2 loadCurrentChar()
2747              
2748             Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
2749              
2750              
2751             =head2 checkStackHas($depth)
2752              
2753             Check that we have at least the specified number of elements on the stack.
2754              
2755             Parameter Description
2756             1 $depth Number of elements required on the stack
2757              
2758             =head2 pushElement()
2759              
2760             Push the current element on to the stack.
2761              
2762              
2763             =head2 pushEmpty()
2764              
2765             Push the empty element on to the stack.
2766              
2767              
2768             =head2 lexicalNameFromLetter($l)
2769              
2770             Lexical name for a lexical item described by its letter.
2771              
2772             Parameter Description
2773             1 $l Letter of the lexical item
2774              
2775             =head2 lexicalNumberFromLetter($l)
2776              
2777             Lexical number for a lexical item described by its letter.
2778              
2779             Parameter Description
2780             1 $l Letter of the lexical item
2781              
2782             =head2 lexicalItemLength($source32, $offset)
2783              
2784             Put the length of a lexical item into variable B.
2785              
2786             Parameter Description
2787             1 $source32 B
of utf32 source representation
2788             2 $offset B to lexical item in utf32
2789              
2790             =head2 new($depth, $description)
2791              
2792             Create a new term in the parse tree rooted on the stack.
2793              
2794             Parameter Description
2795             1 $depth Stack depth to be converted
2796             2 $description Text reason why we are creating a new term
2797              
2798             =head2 error($message)
2799              
2800             Write an error message and stop.
2801              
2802             Parameter Description
2803             1 $message Error message
2804              
2805             =head2 testSet($set, $register)
2806              
2807             Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
2808              
2809             Parameter Description
2810             1 $set Set of lexical letters
2811             2 $register Register to test
2812              
2813             =head2 checkSet($set)
2814              
2815             Check that one of a set of items is on the top of the stack or complain if it is not.
2816              
2817             Parameter Description
2818             1 $set Set of lexical letters
2819              
2820             =head2 reduce($priority)
2821              
2822             Convert the longest possible expression on top of the stack into a term at the specified priority.
2823              
2824             Parameter Description
2825             1 $priority Priority of the operators to reduce
2826              
2827             =head2 reduceMultiple($priority)
2828              
2829             Reduce existing operators on the stack.
2830              
2831             Parameter Description
2832             1 $priority Priority of the operators to reduce
2833              
2834             =head2 accept_a()
2835              
2836             Assign.
2837              
2838              
2839             =head2 accept_b()
2840              
2841             Open.
2842              
2843              
2844             =head2 accept_B()
2845              
2846             Closing parenthesis.
2847              
2848              
2849             =head2 accept_d()
2850              
2851             Infix but not assign or semi-colon.
2852              
2853              
2854             =head2 accept_p()
2855              
2856             Prefix.
2857              
2858              
2859             =head2 accept_q()
2860              
2861             Post fix.
2862              
2863              
2864             =head2 accept_s()
2865              
2866             Semi colon.
2867              
2868              
2869             =head2 accept_v()
2870              
2871             Variable.
2872              
2873              
2874             =head2 parseExpression()
2875              
2876             Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
2877              
2878              
2879             =head2 MatchBrackets(@parameters)
2880              
2881             Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
2882              
2883             Parameter Description
2884             1 @parameters Parameters
2885              
2886             =head2 ClassifyNewLines(@parameters)
2887              
2888             Scan input string looking for opportunities to convert new lines into semi colons.
2889              
2890             Parameter Description
2891             1 @parameters Parameters
2892              
2893             =head2 ClassifyWhiteSpace(@parameters)
2894              
2895             Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
2896              
2897             Parameter Description
2898             1 @parameters Parameters
2899              
2900             =head2 reload($parse, $parameters)
2901              
2902             Reload the variables associated with a parse
2903              
2904             Parameter Description
2905             1 $parse Parse
2906             2 $parameters Hash of variable parameters
2907              
2908             =head2 parseUtf8($parse, @parameters)
2909              
2910             Parse a unisyn expression encoded as utf8 and return the parse tree.
2911              
2912             Parameter Description
2913             1 $parse Parse
2914             2 @parameters Parameters
2915              
2916             =head2 printLexicalItem($parse, $source32, $offset, $size)
2917              
2918             Print the utf8 string corresponding to a lexical item at a variable offset.
2919              
2920             Parameter Description
2921             1 $parse Parse tree
2922             2 $source32 B
of utf32 source representation
2923             3 $offset B to lexical item in utf32
2924             4 $size B in utf32 chars of item
2925              
2926             =head2 showAlphabet($alphabet)
2927              
2928             Show an alphabet.
2929              
2930             Parameter Description
2931             1 $alphabet Alphabet name
2932              
2933             =head2 T($key, $expected, %options)
2934              
2935             Parse some text and dump the results.
2936              
2937             Parameter Description
2938             1 $key Key of text to be parsed
2939             2 $expected Expected result
2940             3 %options Options
2941              
2942             =head2 C($key, $expected, %options)
2943              
2944             Parse some text and print the results.
2945              
2946             Parameter Description
2947             1 $key Key of text to be parsed
2948             2 $expected Expected result
2949             3 %options Options
2950              
2951              
2952             =head1 Index
2953              
2954              
2955             1 L - Assign.
2956              
2957             2 L - Closing parenthesis.
2958              
2959             3 L - Open.
2960              
2961             4 L - Infix but not assign or semi-colon.
2962              
2963             5 L - Prefix.
2964              
2965             6 L - Post fix.
2966              
2967             7 L - Semi colon.
2968              
2969             8 L - Variable.
2970              
2971             9 L - Define a method for ascii text.
2972              
2973             10 L - Translate ascii to the corresponding letters in the assign greek alphabet.
2974              
2975             11 L - Translate ascii to the corresponding letters in the assign latin alphabet.
2976              
2977             12 L - Translate ascii to the corresponding letters in the dyad greek alphabet.
2978              
2979             13 L - Translate ascii to the corresponding letters in the dyad latin alphabet.
2980              
2981             14 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
2982              
2983             15 L - Translate ascii to the corresponding letters in the prefix greek alphabet.
2984              
2985             16 L - Translate ascii to the corresponding letters in the prefix latin alphabet.
2986              
2987             17 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
2988              
2989             18 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
2990              
2991             19 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
2992              
2993             20 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
2994              
2995             21 L - Define a method for an assign operator.
2996              
2997             22 L - Define a method for a bracket operator.
2998              
2999             23 L - Parse some text and print the results.
3000              
3001             24 L - Check that one of a set of items is on the top of the stack or complain if it is not.
3002              
3003             25 L - Check that we have at least the specified number of elements on the stack.
3004              
3005             26 L - Scan input string looking for opportunities to convert new lines into semi colons.
3006              
3007             27 L - Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.
3008              
3009             28 L - Create a new unisyn parse from a utf8 string.
3010              
3011             29 L - Dump the parse tree.
3012              
3013             30 L - Define a method for a dyadic operator.
3014              
3015             31 L - Write an error message and stop.
3016              
3017             32 L - Load the position of a lexical item in its alphabet from the current character.
3018              
3019             33 L - Load the lexical code of the current character in memory into the specified register.
3020              
3021             34 L - Put the length of a lexical item into variable B.
3022              
3023             35 L - Lexical name for a lexical item described by its letter.
3024              
3025             36 L - Lexical number for a lexical item described by its letter.
3026              
3027             37 L - Map a lexical item to a processing subroutine.
3028              
3029             38 L - Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
3030              
3031             39 L - Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket.
3032              
3033             40 L - Create a new term in the parse tree rooted on the stack.
3034              
3035             41 L - Parse the string of classified lexical items addressed by register $start of length $length.
3036              
3037             42 L - Parse a unisyn expression encoded as utf8 and return the parse tree.
3038              
3039             43 L - Define a method for a prefix operator.
3040              
3041             44 L - Print a parse tree.
3042              
3043             45 L - Print the utf8 string corresponding to a lexical item at a variable offset.
3044              
3045             46 L - Print the operator calling sequence.
3046              
3047             47 L - Push the current element on to the stack.
3048              
3049             48 L - Push the empty element on to the stack.
3050              
3051             49 L - Put the specified lexical code into the current character in memory.
3052              
3053             50 L - Convert the longest possible expression on top of the stack into a term at the specified priority.
3054              
3055             51 L - Reduce existing operators on the stack.
3056              
3057             52 L - Reload the variables associated with a parse
3058              
3059             53 L - Define a method for the semicolon operator.
3060              
3061             54 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3062              
3063             55 L - Show an alphabet.
3064              
3065             56 L - Define a method for a suffix operator.
3066              
3067             57 L - Parse some text and dump the results.
3068              
3069             58 L - Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
3070              
3071             59 L - Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
3072              
3073             60 L - Define a method for a variable.
3074              
3075             =head1 Installation
3076              
3077             This module is written in 100% Pure Perl and, thus, it is easy to read,
3078             comprehend, use, modify and install via B:
3079              
3080             sudo cpan install Unisyn::Parse
3081              
3082             =head1 Author
3083              
3084             L
3085              
3086             L
3087              
3088             =head1 Copyright
3089              
3090             Copyright (c) 2016-2021 Philip R Brenan.
3091              
3092             This module is free software. It may be used, redistributed and/or modified
3093             under the same terms as Perl itself.
3094              
3095             =cut
3096              
3097              
3098              
3099             # Tests and documentation
3100              
3101             sub test
3102 1     1 0 6 {my $p = __PACKAGE__;
3103 1         7 binmode($_, ":utf8") for *STDOUT, *STDERR;
3104 1 50       63 return if eval "eof(${p}::DATA)";
3105 1         51 my $s = eval "join('', <${p}::DATA>)";
3106 1 50       20 $@ and die $@;
3107 1 0   1 1 6 eval $s;
  1     1 1 3  
  1     0 1 7  
  1     0   613  
  1     0   52970  
  1     0   8  
  1         111  
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
3108 0 0         $@ and die $@;
3109 0           1
3110             }
3111              
3112             test unless caller;
3113              
3114             1;
3115             # podDocumentation
3116             __DATA__