File Coverage

blib/lib/Unisyn/Parse.pm
Criterion Covered Total %
statement 49 1074 4.5
branch 2 100 2.0
condition 0 9 0.0
subroutine 15 99 15.1
pod 60 62 96.7
total 126 1344 9.3


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/NasmX86/lib/ -I/home/phil/perl/cpan/AsmC/lib/
2             #-------------------------------------------------------------------------------
3             # Parse a Unisyn expression.
4             # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             # Finished in 13.14s, bytes: 2,655,008, execs: 465,858
8             # Can we remove more Pushr by doing one big save in parseutf8 ?
9             package Unisyn::Parse;
10             our $VERSION = "20211008";
11 1     1   2375 use warnings FATAL => qw(all);
  1         7  
  1         34  
12 1     1   4 use strict;
  1         2  
  1         29  
13 1     1   5 use Carp qw(confess cluck);
  1         2  
  1         79  
14 1     1   431 use Data::Dump qw(dump);
  1         6885  
  1         59  
15 1     1   3305 use Data::Table::Text qw(:all !parse);
  1         125876  
  1         1621  
16 1     1   5747 use Nasm::X86 qw(:all);
  1         146055  
  1         2705  
17 1     1   12 use feature qw(say current_sub);
  1         3  
  1         99  
18 1     1   6 use utf8;
  1         2  
  1         9  
19              
20             makeDieConfess;
21              
22             my $develop = -e q(/home/phil/); # Developing
23             our $Parse; # One of the advantages of creating a parse tree is that we can perform parse one at a time making it safe to globalize this variable. The alternative is to pass this variable between all the parsing calls which would obscure their workings greatly.
24             our $ParseUtf8SubDef; # The definition of the subroutine that does the parsing so that we can reuse its parameters when we call L.
25             our $debug = 0; # Print evolution of stack if true.
26              
27             #D1 Create # Create a Unisyn parse of a utf8 string.
28              
29             sub create($%) # Create a new unisyn parse from a utf8 string.
30 0     0 1 0 {my ($address, %options) = @_; # Address of a zero terminated utf8 source string to parse as a variable, parse options.
31 0 0       0 @_ >= 1 or confess "One or more parameters";
32              
33 0         0 my $a = CreateArena; # Arena to hold parse tree - every parse tree gets its own arena so that we can free parses separately
34 0         0 my $size = StringLength string => $address; # Length of input utf8
35              
36 0         0 my $p = $Parse = genHash(__PACKAGE__, # Description of parse
37             arena => $a, # Arena containing tree
38             size8 => $size, # Size of source string as utf8
39             address8 => $address, # Address of source string as utf8
40             source32 => V(source32), # Source text as utf32
41             sourceSize32 => V(sourceSize32), # Size of utf32 allocation
42             sourceLength32 => V(sourceLength32), # Length of utf32 string
43             parse => V('parse'), # Offset to the head of the parse tree
44             fails => V('fail'), # Number of failures encountered in this parse
45             quarks => $a->CreateQuarks, # Quarks representing the strings used in this parse
46             operators => undef, # Methods implementing each lexical operator
47             );
48              
49 0 0       0 if (my $o = $options{operators}) # Operator methods for lexical items
50 0         0 {$p->operators = $a->CreateQuarks; # Create quark set to translate operator names to offsets
51 0         0 $o->($p);
52             }
53              
54 0         0 $p->parseUtf8; # Parse utf8 source string
55              
56 0         0 $p
57             }
58              
59             #D1 Parse # Parse Unisyn expressions
60              
61             our $Lex = &lexicalData; # Lexical table definitions
62              
63             our $ses = RegisterSize rax; # Size of an element on the stack
64             our ($w1, $w2, $w3) = (r8, r9, r10); # Work registers
65             our $prevChar = r11; # The previous character parsed
66             our $index = r12; # Index of current element
67             our $element = r13; # Contains the item being parsed
68             our $start = r14; # Start of the parse string
69             our $size = r15; # Length of the input string
70             our $parseStackBase = rsi; # The base of the parsing stack in the stack
71             #ur $arenaReg = rax; # The arena in which we are building the parse tree
72             our $indexScale = 4; # The size of a utf32 character
73             our $lexCodeOffset = 3; # The offset in a classified character to the lexical code.
74             our $bitsPerByte = 8; # The number of bits in a byte
75              
76             our $Ascii = $$Lex{lexicals}{Ascii} {number}; # Ascii
77             our $assign = $$Lex{lexicals}{assign} {number}; # Assign
78             our $dyad = $$Lex{lexicals}{dyad} {number}; # Dyad
79             our $CloseBracket = $$Lex{lexicals}{CloseBracket} {number}; # Close bracket
80             our $empty = $$Lex{lexicals}{empty} {number}; # Empty element
81             our $NewLineSemiColon = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semicolon
82             our $OpenBracket = $$Lex{lexicals}{OpenBracket} {number}; # Open bracket
83             our $prefix = $$Lex{lexicals}{prefix} {number}; # Prefix operator
84             our $semiColon = $$Lex{lexicals}{semiColon} {number}; # Semicolon
85             our $suffix = $$Lex{lexicals}{suffix} {number}; # Suffix
86             our $term = $$Lex{lexicals}{term} {number}; # Term
87             our $variable = $$Lex{lexicals}{variable} {number}; # Variable
88             our $WhiteSpace = $$Lex{lexicals}{WhiteSpace} {number}; # Variable
89             our $firstSet = $$Lex{structure}{first}; # First symbols allowed
90             our $lastSet = $$Lex{structure}{last}; # Last symbols allowed
91             our $bracketsBase = $$Lex{bracketsBase}; # Base lexical item for brackets
92              
93             our $asciiNewLine = ord("\n"); # New line in ascii
94             our $asciiSpace = ord(' '); # Space in ascii
95              
96             our $lexItemType = 0; # Field number of lexical item type in the description of a lexical item
97             our $lexItemOffset = 1; # Field number of the offset in the utf32 source of the lexical item in the description of a lexical item or - if this a term - the offset of the invariant first block of the sub tree
98             our $lexItemLength = 2; # Field number of the length of the lexical item in the utf32 source in the description of a lexical item
99             our $lexItemQuark = 3; # Quark containing the text of this lexical item.
100             our $lexItemWidth = 4; # The number of fields used to describe a lexical item in the parse tree
101              
102             our $opType = 0; # Operator type field - currently always a term
103             our $opCount = 1; # Number of operands for this operator
104             our $opSub = 2; # Offset of sub associated with this lexical item
105              
106             sub getAlpha($$$) #P Load the position of a lexical item in its alphabet from the current character.
107 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
108 0         0 Mov $register, "[$address+$indexScale*$index]"; # Load lexical code
109             }
110              
111             sub getLexicalCode($$$) #P Load the lexical code of the current character in memory into the specified register.
112 0     0 1 0 {my ($register, $address, $index) = @_; # Register to load, address of start of string, index into string
113 0         0 Mov $register, "[$address+$indexScale*$index+$lexCodeOffset]"; # Load lexical code
114             }
115              
116             sub putLexicalCode($$$$) #P Put the specified lexical code into the current character in memory.
117 0     0 1 0 {my ($register, $address, $index, $code) = @_; # Register used to load code, address of string, index into string, code to put
118 0         0 Mov $register, $code;
119 0         0 Mov "[$address+$indexScale*$index+$lexCodeOffset]", $register; # Save lexical code
120             }
121              
122             sub loadCurrentChar() #P Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
123 0     0 1 0 {my $r = $element."b"; # Classification byte
124              
125 0         0 Mov $element, $index; # Load index of character as upper dword
126 0         0 Shl $element, $indexScale * $bitsPerByte; # Save the index of the character in the upper half of the register so that we know where the character came from.
127 0         0 getLexicalCode $r, $start, $index; # Load lexical classification as lowest byte
128              
129 0         0 Cmp $r, $bracketsBase; # Brackets , due to their frequency, start after 0x10 with open even and close odd
130             IfGe # Brackets
131             Then
132 0     0   0 {And $r, 1 # Bracket: 0 - open, 1 - close
133             },
134             Else
135 0     0   0 {Cmp $r, $Ascii; # Ascii is a type of variable
136             IfEq
137             Then
138 0         0 {Mov $r, $variable;
139             },
140             Else
141 0         0 {Cmp $r, $NewLineSemiColon; # New line semicolon is a type of semi colon
142             IfEq
143             Then
144 0         0 {Mov $r, $semiColon;
145 0         0 };
146 0         0 };
147 0         0 };
148             }
149              
150             sub checkStackHas($) #P Check that we have at least the specified number of elements on the stack.
151 0     0 1 0 {my ($depth) = @_; # Number of elements required on the stack
152 0         0 Mov $w1, $parseStackBase;
153 0         0 Sub $w1, rsp;
154 0         0 Cmp $w1, $ses * $depth;
155             }
156              
157             sub pushElement() #P Push the current element on to the stack.
158 0     0 1 0 {Push $element;
159 0 0       0 if ($debug)
160 0         0 {PrintErrStringNL "Push Element:";
161 0         0 PrintErrRegisterInHex $element;
162             }
163             }
164              
165             sub pushEmpty() #P Push the empty element on to the stack.
166 0     0 1 0 {Mov $w1, $index;
167 0         0 Shl $w1, $indexScale * $bitsPerByte;
168 0         0 Or $w1, $empty;
169 0         0 Push $w1;
170 0 0       0 if ($debug)
171 0         0 {PrintErrStringNL "Push Empty";
172             }
173             }
174              
175             sub lexicalNameFromLetter($) #P Lexical name for a lexical item described by its letter.
176 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
177 0         0 my %l = $Lex->{treeTermLexicals}->%*;
178 0         0 my $n = $l{$l};
179 0 0       0 confess "No such lexical: $l" unless $n;
180             $n->{short}
181 0         0 }
182              
183             sub lexicalNumberFromLetter($) #P Lexical number for a lexical item described by its letter.
184 0     0 1 0 {my ($l) = @_; # Letter of the lexical item
185 0         0 my $n = lexicalNameFromLetter $l;
186 0         0 my $N = $Lex->{lexicals}{$n}{number};
187 0 0       0 confess "No such lexical named: $n" unless defined $N;
188 0         0 $N
189             }
190              
191             sub lexicalItemLength($$) #P Put the length of a lexical item into variable B.
192 0     0 1 0 {my ($source32, $offset) = @_; # B
of utf32 source representation, B to lexical item in utf32
193              
194             my $s = Subroutine
195 0     0   0 {my ($p, $s) = @_; # Parameters
196             # PushR r14, r15; # We do not need to save the zmm and mask registers because they are only used as temporary work registers and they have been saved in L
197              
198 0         0 $$p{source32}->setReg(r14);
199 0         0 $$p{offset} ->setReg(r15);
200 0         0 Vmovdqu8 zmm0, "[r14+4*r15]"; # Load source to examine
201 0         0 Pextrw r15, xmm0, 1; # Extract lexical type of first element
202              
203             OrBlock # The size of a bracket or a semi colon is always 1
204 0         0 {my ($pass, $end, $start) = @_;
205 0         0 Cmp r15, $OpenBracket;
206 0         0 Je $pass;
207 0         0 Cmp r15, $CloseBracket;
208 0         0 Je $pass;
209 0         0 Cmp r15, $semiColon;
210 0         0 Je $pass;
211              
212 0         0 Vpbroadcastw zmm1, r15w; # Broadcast lexical type
213 0         0 Vpcmpeqw k0, zmm0, zmm1; # Check extent of first lexical item up to 16
214 0         0 Mov r15, 0x55555555; # Set odd positions to one where we know the match will fail
215 0         0 Kmovq k1, r15;
216 0         0 Korq k2, k0, k1; # Fill in odd positions
217              
218 0         0 Kmovq r15, k2;
219 0         0 Not r15; # Swap zeroes and ones
220 0         0 Tzcnt r15, r15; # Trailing zero count is a factor two too big
221 0         0 Shr r15, 1; # Normalized count of number of characters in lexical item
222 0         0 $$p{size}->getReg(r15); # Save size in supplied variable
223             }
224             Pass # Show unitary length
225 0         0 {my ($end, $pass, $start) = @_;
226 0         0 $$p{size}->getConst(1); # Save size in supplied variable
227 0         0 };
228              
229             # PopR;
230 0         0 } [qw(offset source32 size)],
231             name => q(Unisyn::Parse::lexicalItemLength);
232              
233 0         0 $s->call(offset => $offset, source32 => $source32, my $size = V(size));
234              
235 0         0 $size
236             }
237              
238             sub new($$) #P Create a new term in the parse tree rooted on the stack.
239 0     0 1 0 {my ($depth, $description) = @_; # Stack depth to be converted, text reason why we are creating a new term
240              
241 0         0 my $wr = RegisterSize rax; # Width of general purpose register
242              
243             my $s = Subroutine
244 0     0   0 {my ($locals) = @_; # Parameters
245 0         0 my $a = DescribeArena $$locals{bs}; # Address arena
246              
247             my $quarks = $Parse->quarks->reload(arena => $$locals{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
248             array => $$locals{numbersToStringsFirst},
249 0         0 tree => $$locals{stringsToNumbersFirst});
250              
251             my $operators = $Parse->operators ? $Parse->operators->reload # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
252             (arena => $$locals{bs},
253             array => $$locals{opNumbersToStringsFirst},
254 0 0       0 tree => $$locals{opStringsToNumbersFirst}) : undef;
255              
256 0         0 my $t = $a->CreateTree; # Create a tree in the arena to hold the details of the lexical elements on the stack
257 0         0 my $o = V(offset); # Offset into source for lexical item
258 0         0 $t->insert(V(key, $opType), K(data, $term)); # Create a term - we only have terms at the moment in the parse tree - but that might change in the future
259 0         0 $t->insert(V(key, $opCount), K(data, $depth)); # The number of elements in the term which is the number of operands for the operator
260              
261 0         0 my $liOnStack = $w1; # The lexical item as it appears on the stack
262 0         0 my $liType = $w2; # The lexical item type
263 0         0 my $liOffset = $w3; # The lexical item offset in the source
264              
265 0         0 PushR zmm0; # Put the simulated stack on the stack
266              
267 0         0 for my $i(1..$depth) # Each term
268 0         0 {my $j = $depth + 1 - $i;
269 0         0 my $k = ($i - 1) * $wr; # Position in simulated stack
270 0         0 Mov $liOnStack, "[rsp+$k]"; # Copy term out of simulated stack
271 0 0       0 PrintErrRegisterInHex $liOnStack if $debug;
272              
273 0         0 Mov $liOffset, $liOnStack; # Offset of either the text in the source or the offset of the first block of the tree describing a term
274 0         0 Shr $liOffset, 32; # Offset in source: either the actual text of the offset of the first block of the tree containing a term shifted over to look as if it were an offset in the source
275 0         0 $o->getReg($liOffset); # Offset of lexical item in source or offset of first block in tree describing a term
276              
277 0         0 ClearRegisters $liType;
278 0         0 Mov $liType."b", $liOnStack."b"; # The lexical item type in the lowest byte, the rest clear.
279              
280 0         0 Cmp $liType, $term; # Check whether the lexical item on the stack is a term
281             IfEq # Insert a sub tree if we are inserting a term
282             Then
283 0         0 {$t->insertTree(K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset of first block in the tree representing the term
284             },
285             Else # Insert the offset in the utf32 source if we are not on a term
286 0         0 {$t->insert (K(key, $lexItemWidth * $j + $lexItemOffset), $o); # Offset in source of non term
287 0         0 };
288              
289 0         0 Cmp $liType, $variable; # Check whether the lexical item is a variable which can also represent ascii
290             IfEq # Insert a sub tree if we are inserting a term
291             Then
292 0         0 {Mov $liType."b", "[$start+4*$liOffset+3]"; # Load lexical type from source
293 0         0 };
294              
295 0         0 Cmp $liType, $term; # Length of lexical item that is not a term
296             IfNe
297             Then # Not a term
298 0         0 {my $size = lexicalItemLength(V(address, $start), $o); # Get the size of the lexical item at the offset indicated on the stack
299 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemLength), $size); # Save size of lexical item in parse tree
300              
301 0         0 my $s = CreateShortString(1); # Short string to hold text of lexical item so we can load it into a quark
302 0         0 $s->clear; # Perhaps not strictly necessary but easier to debug
303 0         0 PushR r15; # Probably not needed as saved in L
304 0 0 0     0 r15 ne $start && r15 ne $liOffset or confess "r15 in use";
305 0         0 Lea r15, "[$start+4*$liOffset]"; # Start address of lexical item
306 0         0 my $startAddress = V(address, r15); # Save start address of lexical item
307 0         0 PopR;
308              
309 0         0 Cmp $liType, $OpenBracket; # Is it a bracket ?
310             IfEq
311             Then
312 0         0 {ClearRegisters $liType; # Compute lexical type of bracket by adding bracket number to the start of the bracket range
313 0         0 Mov $liType."b", "[$start+4*$liOffset+3]"; # Load bracket number
314 0         0 Shl $liType, 16; # Shift bracket base into position
315 0         0 Add $liType, 2; # Set length of short string as two = (lexical type, bracket number)
316 0         0 Pinsrd "xmm1", $liType."d", 0; # Load short string
317 0         0 Shr $liType, 16; # Move lexical type back into position for insertion into the parse tree
318             },
319             Else
320 0         0 {$s->loadDwordBytes(0, $startAddress, $size, 1); # Load text of lexical item into short string leaving space for lexical type
321 0         0 Pinsrb "xmm1", $liType."b", 1; # Set lexical type as the first byte of the short string
322 0         0 };
323              
324 0         0 my $q = $quarks->quarkFromShortString($s); # Find the quark matching the lexical item if there is such a quark
325 0         0 $t->insert(V(key, $lexItemWidth * $j + $lexItemQuark), $q); # Save quark number of lexical item in parse tree
326 0 0       0 if ($operators) # The parse has operator definitions
327 0 0       0 {if ($j == 1) # The operator quark is always first
328             {OrBlock # Like an operator or like a variable?
329 0         0 {my ($pass, $end, $start) = @_;
330 0         0 Cmp $liType, $variable;
331 0         0 Je $pass; # Process a variable
332 0         0 Cmp $liType, $Ascii;
333 0         0 Je $pass; # Process ascii constant
334 0         0 Cmp $liType, $semiColon;
335 0         0 Je $pass; # Process Semicolon
336 0         0 Cmp $liType, $NewLineSemiColon;
337 0         0 Je $pass; # Process new line semicolon
338             # Process non variable, i.e. operators specifically
339 0         0 my $N = $operators->subFromQuarkViaQuarks($quarks, $q); # Look up the subroutine associated with this operator
340             If $N >= 0, # Found a matching operator subroutine
341             Then
342 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
343 0         0 };
344             }
345             Pass # Process variables in general or items based on variables using a short string of length 1 being the lexical type of the item in question
346 0         0 {Shl $liType, 8; # Move lexical type into second byte
347 0         0 Inc $liType; # Show length
348 0         0 Pinsrq "xmm1", $liType, 0; # Load short string
349 0         0 my $N = $operators->subFromShortString($s); # Address of sub to process variable or ascii or semicolon
350 0         0 Shr $liType, 8; # Restore lexical type
351             If $N >= 0, # Found a matching operator subroutine
352             Then
353 0         0 {$t->insert(V(key, $opSub), $N); # Save offset to subroutine associated with this lexical item
354 0         0 };
355 0         0 };
356             }
357             }
358 0         0 };
359              
360 0         0 $t->insert (V(key, $lexItemWidth * $j + $lexItemType), # Save lexical type in parse tree
361             V(data)->getReg($liType));
362             }
363             # Push new term onto the stack in place of the items popped off
364 0         0 $t->first->setReg($liOffset); # Offset of new term tree
365 0         0 Shl $liOffset, 32; # Push offset to term tree into the upper dword to make it look like a source offset
366 0         0 Or $liOffset."b", $term; # Mark as a term tree
367 0         0 $$locals{new}->getReg($liOffset); # New term comprised of a tree of old terms
368 0         0 PopR; # Restore stack to its position at the start
369             }
370 0         0 [qw(new)], with => $ParseUtf8SubDef,
371             # [qw(bs new
372             # numbersToStringsFirst stringsToNumbersFirst
373             # opNumbersToStringsFirst opStringsToNumbersFirst
374             # )],
375             name=>"Unisyn::Parse::new_$depth";
376              
377 0 0       0 PrintErrStringNL "New: $description" if $debug;
378              
379 0 0       0 if ($depth == 1) {Mov $w1, 1} # Copy the top of the real stack which holds the parse state to zmm0 so that we can adjust the stack to call L
  0 0       0  
380 0         0 elsif ($depth == 2) {Mov $w1, 3}
381 0         0 else {Mov $w1, 7}
382 0         0 Kmovq k1, $w1; # B is saved in L
383 0         0 Vmovdqu64 "zmm0{k1}", "[rsp]"; # Copy top lexical items on stack
384              
385             # $s->call(bs => $Parse->arena->bs, my $new = V('new'),
386             # numbersToStringsFirst => $Parse->quarks->numbersToStrings->first,
387             # stringsToNumbersFirst => $Parse->quarks->stringsToNumbers->first,
388             # opNumbersToStringsFirst => $Parse->operators ? $Parse->operators->subQuarks->numbersToStrings->first : 0,
389             # opStringsToNumbersFirst => $Parse->operators ? $Parse->operators->subQuarks->stringsToNumbers->first : 0,
390             # );
391              
392 0         0 $s->call(my $new = V('new'));
393              
394 0         0 $new->setReg($w1); # Save offset of new term in a work register
395 0         0 Add rsp, $depth * $wr; # Remove input terms from stack
396 0         0 Push $w1; # Save new term on stack
397             }
398              
399             sub error($) #P Write an error message and stop.
400 0     0 1 0 {my ($message) = @_; # Error message
401 0         0 PrintOutStringNL "Error: $message";
402 0         0 PrintOutString "Element: ";
403 0         0 PrintOutRegisterInHex $element;
404 0         0 PrintOutString "Index : ";
405 0         0 PrintOutRegisterInHex $index;
406 0         0 Exit(0);
407             }
408              
409             sub testSet($$) #P Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
410 0     0 1 0 {my ($set, $register) = @_; # Set of lexical letters, Register to test
411 0         0 my @n = map {sprintf("0x%x", lexicalNumberFromLetter $_)} split //, $set; # Each lexical item by number from letter
  0         0  
412 0         0 my $end = Label;
413 0         0 for my $n(@n)
414 0         0 {Cmp $register."b", $n;
415 0         0 Je $end
416             }
417 0         0 ClearZF;
418 0         0 SetLabel $end;
419             }
420              
421             sub checkSet($) #P Check that one of a set of items is on the top of the stack or complain if it is not.
422 0     0 1 0 {my ($set) = @_; # Set of lexical letters
423 0         0 my @n = map {lexicalNumberFromLetter $_} split //, $set;
  0         0  
424 0         0 my $end = Label;
425              
426 0         0 for my $n(@n)
427 0         0 {Cmp "byte[rsp]", $n;
428 0         0 Je $end
429             }
430 0         0 error("Expected one of: '$set' on the stack");
431 0         0 ClearZF;
432 0         0 SetLabel $end;
433             }
434              
435             sub reduce($) #P Convert the longest possible expression on top of the stack into a term at the specified priority.
436 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
437 0         0 $priority =~ m(\A(1|3)\Z); # Level: 1 - all operators, 2 - priority 2 operators
438 0         0 my ($success, $end) = map {Label} 1..2; # Exit points
  0         0  
439              
440 0         0 checkStackHas 3; # At least three elements on the stack
441             IfGe
442             Then
443 0     0   0 {my ($l, $d, $r) = ($w1, $w2, $w3);
444 0         0 Mov $l, "[rsp+".(2*$ses)."]"; # Top 3 elements on the stack
445 0         0 Mov $d, "[rsp+".(1*$ses)."]";
446 0         0 Mov $r, "[rsp+".(0*$ses)."]";
447              
448 0 0       0 if ($debug)
449 0         0 {PrintErrStringNL "Reduce 3:";
450 0         0 PrintErrRegisterInHex $l, $d, $r;
451             }
452              
453 0         0 testSet("t", $l); # Parse out infix operator expression
454             IfEq
455             Then
456 0         0 {testSet("t", $r);
457             IfEq
458             Then
459 0 0       0 {testSet($priority == 1 ? "ads" : 'd', $d); # Reduce all operators or just reduce infix priority 3 operators
460             IfEq
461             Then
462 0         0 {Add rsp, 3 * $ses; # Reorder into polish notation
463 0         0 Push $_ for $d, $l, $r;
464 0         0 new(3, "Term infix term");
465 0         0 Jmp $success;
466 0         0 };
467 0         0 };
468 0         0 };
469              
470 0         0 testSet("b", $l); # Parse parenthesized term
471             IfEq
472             Then
473 0         0 {testSet("B", $r);
474             IfEq
475             Then
476 0         0 {testSet("t", $d);
477             IfEq
478             Then
479 0         0 {Add rsp, $ses;
480 0         0 new(1, "Bracketed term");
481 0         0 new(2, "Brackets for term");
482 0 0       0 PrintErrStringNL "Reduce by ( term )" if $debug;
483 0         0 Jmp $success;
484 0         0 };
485 0         0 };
486 0         0 };
487 0         0 };
488              
489 0         0 checkStackHas 2; # At least two elements on the stack
490             IfGe # Convert an empty pair of parentheses to an empty term
491             Then
492 0     0   0 {my ($l, $r) = ($w1, $w2);
493              
494 0 0       0 if ($debug)
495 0         0 {PrintErrStringNL "Reduce 2:";
496 0         0 PrintErrRegisterInHex $l, $r;
497             }
498              
499             # KeepFree $l, $r; # Why ?
500 0         0 Mov $l, "[rsp+".(1*$ses)."]"; # Top 3 elements on the stack
501 0         0 Mov $r, "[rsp+".(0*$ses)."]";
502 0         0 testSet("b", $l); # Empty pair of parentheses
503             IfEq
504             Then
505 0         0 {testSet("B", $r);
506             IfEq
507             Then
508 0         0 {Add rsp, 2 * $ses; # Pop expression
509 0         0 Push $l; # Bracket as operator
510 0         0 new(1, "Empty brackets");
511 0         0 Jmp $success;
512 0         0 };
513 0         0 };
514 0         0 testSet("s", $l); # Semi-colon, close implies remove unneeded semi
515             IfEq
516             Then
517 0         0 {testSet("B", $r);
518             IfEq
519             Then
520 0         0 {Add rsp, 2 * $ses; # Pop expression
521 0         0 Push $r;
522 0 0       0 PrintErrStringNL "Reduce by ;)" if $debug;
523 0         0 Jmp $success;
524 0         0 };
525 0         0 };
526 0         0 testSet("p", $l); # Prefix, term
527             IfEq
528             Then
529 0         0 {testSet("t", $r);
530             IfEq
531             Then
532 0         0 {new(2, "Prefix term");
533 0         0 Jmp $success;
534 0         0 };
535 0         0 };
536             # KeepFree $l, $r;
537 0         0 };
538              
539 0         0 ClearZF; # Failed to match anything
540 0         0 Jmp $end;
541              
542 0         0 SetLabel $success; # Successfully matched
543 0         0 SetZF;
544              
545 0         0 SetLabel $end; # End
546             } # reduce
547              
548             sub reduceMultiple($) #P Reduce existing operators on the stack.
549 0     0 1 0 {my ($priority) = @_; # Priority of the operators to reduce
550             K('count',99)->for(sub # An improbably high but finite number of reductions
551 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
552 0         0 reduce($priority);
553 0         0 Jne $end; # Keep going as long as reductions are possible
554 0         0 });
555             }
556              
557             sub accept_a() #P Assign.
558 0     0 1 0 {checkSet("t");
559 0         0 reduceMultiple 2;
560 0 0       0 PrintErrStringNL "accept a" if $debug;
561 0         0 pushElement;
562             }
563              
564             sub accept_b #P Open.
565 0     0 1 0 {checkSet("abdps");
566 0 0       0 PrintErrStringNL "accept b" if $debug;
567 0         0 pushElement;
568             }
569              
570             sub accept_B #P Closing parenthesis.
571 0     0 1 0 {checkSet("bst");
572 0 0       0 PrintErrStringNL "accept B" if $debug;
573 0         0 reduceMultiple 1;
574 0         0 pushElement;
575 0         0 reduceMultiple 1;
576 0         0 checkSet("bst");
577             }
578              
579             sub accept_d #P Infix but not assign or semi-colon.
580 0     0 1 0 {checkSet("t");
581 0 0       0 PrintErrStringNL "accept d" if $debug;
582 0         0 pushElement;
583             }
584              
585             sub accept_p #P Prefix.
586 0     0 1 0 {checkSet("abdps");
587 0 0       0 PrintErrStringNL "accept p" if $debug;
588 0         0 pushElement;
589             }
590              
591             sub accept_q #P Post fix.
592 0     0 1 0 {checkSet("t");
593 0 0       0 PrintErrStringNL "accept q" if $debug;
594             IfEq # Post fix operator applied to a term
595             Then
596 0     0   0 {Pop $w1;
597 0         0 pushElement;
598 0         0 Push $w1;
599 0         0 new(2, "Postfix");
600             }
601 0         0 }
602              
603             sub accept_s #P Semi colon.
604 0     0 1 0 {checkSet("bst");
605 0 0       0 PrintErrStringNL "accept s" if $debug;
606 0         0 Mov $w1, "[rsp]";
607 0         0 testSet("s", $w1);
608             IfEq # Insert an empty element between two consecutive semicolons
609             Then
610 0     0   0 {pushEmpty;
611 0         0 };
612 0         0 reduceMultiple 1;
613 0         0 pushElement;
614             }
615              
616             sub accept_v #P Variable.
617 0     0 1 0 {checkSet("abdps");
618 0 0       0 PrintErrStringNL "accept v" if $debug;
619 0         0 pushElement;
620 0         0 new(1, "Variable");
621             V(count,99)->for(sub # Reduce prefix operators
622 0     0   0 {my ($index, $start, $next, $end) = @_;
623 0         0 checkStackHas 2;
624 0         0 Jl $end;
625 0         0 my ($l, $r) = ($w1, $w2);
626 0         0 Mov $l, "[rsp+".(1*$ses)."]";
627 0         0 Mov $r, "[rsp+".(0*$ses)."]";
628 0         0 testSet("p", $l);
629 0         0 Jne $end;
630 0         0 new(2, "Prefixed variable");
631 0         0 });
632             }
633              
634             sub parseExpression() #P Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
635 0     0 1 0 {my $end = Label;
636 0         0 my $eb = $element."b"; # Contains a byte from the item being parsed
637              
638 0         0 Cmp $size, 0; # Check for empty expression
639 0         0 Je $end;
640              
641 0         0 loadCurrentChar; # Load current character
642             ### Need test for ignorable white space as first character
643 0         0 testSet($firstSet, $element);
644             IfNe
645             Then
646 0     0   0 {error(<
647             Expression must start with 'opening parenthesis', 'prefix
648             operator', 'semi-colon' or 'variable'.
649             END
650 0         0 };
651              
652 0         0 testSet("v", $element); # Single variable
653             IfEq
654             Then
655 0     0   0 {pushElement;
656 0         0 new(1, "accept initial variable");
657             },
658             Else
659 0     0   0 {testSet("s", $element); # Semi
660             IfEq
661             Then
662 0         0 {pushEmpty;
663 0         0 new(1, "accept initial semicolon");
664 0         0 };
665 0         0 pushElement;
666 0         0 };
667              
668 0         0 Inc $index; # We have processed the first character above
669 0         0 Mov $prevChar, $element; # Initialize the previous lexical item
670              
671             For # Parse each utf32 character after it has been classified
672 0     0   0 {my ($start, $end, $next) = @_; # Start and end of the classification loop
673 0         0 loadCurrentChar; # Load current character
674              
675 0 0       0 PrintErrRegisterInHex $element if $debug;
676              
677 0         0 Cmp $eb, $WhiteSpace;
678 0         0 Je $next; # Ignore white space
679              
680 0         0 Cmp $eb, 1; # Brackets are singular but everything else can potential be a plurality
681             IfGt
682             Then
683 0         0 {Cmp $prevChar."b", $eb; # Compare with previous element known not to be white space or a bracket
684 0         0 Je $next
685 0         0 };
686 0         0 Mov $prevChar, $element; # Save element to previous element now we know we are on a different element
687              
688 0         0 for my $l(sort keys $Lex->{lexicals}->%*) # Each possible lexical item after classification
689 0         0 {my $x = $Lex->{lexicals}{$l}{letter};
690 0 0       0 next unless $x; # Skip characters that do not have a letter defined for Tree::Term because the lexical items needed to layout a file of lexical items are folded down to the actual lexical items required to represent the language independent of the textual layout with white space.
691              
692 0         0 my $n = $Lex->{lexicals}{$l}{number};
693 0         0 Comment "Compare to $n for $l";
694 0         0 Cmp $eb, $n;
695              
696             IfEq
697             Then
698 0         0 {eval "accept_$x";
699 0         0 Jmp $next
700 0         0 };
701             }
702 0         0 error("Unexpected lexical item"); # Not selected
703 0         0 } $index, $size;
704              
705 0         0 testSet($lastSet, $prevChar); # Last lexical element
706             IfNe # Incomplete expression
707             Then
708 0     0   0 {error("Incomplete expression");
709 0         0 };
710              
711             K('count', 99)->for(sub # Remove trailing semicolons if present
712 0     0   0 {my ($index, $start, $next, $end) = @_; # Execute body
713 0         0 checkStackHas 2;
714 0         0 Jl $end; # Does not have two or more elements
715 0         0 Pop $w1;
716 0         0 testSet("s", $w1); # Check that the top most element is a semi colon
717             IfNe # Not a semi colon so put it back and finish the loop
718             Then
719 0         0 {Push $w1;
720 0         0 Jmp $end;
721 0         0 };
722 0         0 });
723              
724 0         0 reduceMultiple 1; # Final reductions
725              
726 0         0 checkStackHas 1;
727             IfNe # Incomplete expression
728             Then
729 0     0   0 {error("Multiple expressions on stack");
730 0         0 };
731              
732 0         0 Pop r15; # The resulting parse tree
733 0         0 Shr r15, 32; # The offset of the resulting parse tree
734 0         0 SetLabel $end;
735             } # parseExpression
736              
737             sub MatchBrackets(@) #P Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
738 0     0 1 0 {my (@parameters) = @_; # Parameters
739 0 0       0 @_ >= 1 or confess "One or more parameters";
740              
741             my $s = Subroutine
742 0     0   0 {my ($p) = @_; # Parameters
743 0         0 Comment "Match brackets in utf32 text";
744              
745 0         0 my $finish = Label;
746 0         0 PushR xmm0, k7, r10, r11, r12, r13, r14, r15, rsi; # R15 current character address. r14 is the current classification. r13 the last classification code. r12 the stack depth. r11 the number of opening brackets found. r10 address of first utf32 character.
747              
748 0         0 Mov rsi, rsp; # Save stack location so we can use the stack to record the brackets we have found
749 0         0 ClearRegisters r11, r12, r15; # Count the number of brackets and track the stack depth, index of each character
750 0         0 K(three, 3)->setMaskFirst(k7); # These are the number of bytes that we are going to use for the offsets of brackets which limits the size of a program to 24 million utf32 characters
751 0         0 $$p{fail} ->getReg(r11); # Clear failure indicator
752 0         0 $$p{opens} ->getReg(r11); # Clear count of opens
753 0         0 $$p{address}->setReg(r10); # Address of first utf32 character
754 0         0 my $w = RegisterSize eax; # Size of a utf32 character
755              
756             $$p{size}->for(sub # Process each utf32 character in the block of memory
757 0         0 {my ($index, $start, $next, $end) = @_;
758 0         0 my $continue = Label;
759              
760 0         0 Mov r14b, "[r10+$w*r15+3]"; # Classification character
761              
762 0         0 Cmp r14, 0x10; # First bracket
763 0         0 Jl $continue; # Less than first bracket
764 0         0 Cmp r14, 0x4f; # Last bracket
765 0         0 Jg $continue; # Greater than last bracket
766              
767 0         0 Test r14, 1; # Zero means that the bracket is an opener
768             IfZ sub # Save an opener then continue
769 0         0 {Push r15; # Save position in input
770 0         0 Push r14; # Save opening code
771 0         0 Inc r11; # Count number of opening brackets
772 0         0 Inc r12; # Number of brackets currently open
773 0         0 Jmp $continue;
774 0         0 };
775 0         0 Cmp r12, 1; # Check that there is a bracket to match on the stack
776             IfLt sub # Nothing on stack
777 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
778 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
779 0         0 Jmp $finish; # Return
780 0         0 };
781 0         0 Mov r13, "[rsp]"; # Peek at the opening bracket code which is on top of the stack
782 0         0 Inc r13; # Expected closing bracket
783 0         0 Cmp r13, r14; # Check for match
784             IfNe sub # Mismatch
785 0         0 {Not r15; # Minus the offset at which the error occurred so that we can fail at zero
786 0         0 $$p{fail}->getReg(r15); # Position in input that caused the failure
787 0         0 Jmp $finish; # Return
788 0         0 };
789 0         0 Pop r13; # The closing bracket matches the opening bracket
790 0         0 Pop r13; # Offset of opener
791 0         0 Dec r12; # Close off bracket sequence
792 0         0 Vpbroadcastq xmm0, r15; # Load offset of opener
793 0         0 Vmovdqu8 "[r10+$w*r13]\{k7}", xmm0; # Save offset of opener in the code for the closer - the classification is left intact so we still know what kind of bracket we have
794 0         0 Vpbroadcastq xmm0, r13; # Load offset of opener
795 0         0 Vmovdqu8 "[r10+$w*r15]\{k7}", xmm0; # Save offset of closer in the code for the openercloser - the classification is left intact so we still know what kind of bracket we have
796 0         0 SetLabel $continue; # Continue with next character
797 0         0 Inc r15; # Next character
798 0         0 });
799              
800 0         0 SetLabel $finish;
801 0         0 Mov rsp, rsi; # Restore stack
802 0         0 $$p{opens}->getReg(r11); # Number of brackets opened
803 0         0 PopR;
804 0         0 } [qw(address size fail opens)], name => q(Unisyn::Parse::MatchBrackets);
805              
806 0         0 $s->call(@parameters);
807             } # MatchBrackets
808              
809             sub ClassifyNewLines(@) #P Scan input string looking for opportunities to convert new lines into semi colons.
810 0     0 1 0 {my (@parameters) = @_; # Parameters
811 0 0       0 @_ >= 1 or confess "One or more parameters";
812              
813             my $s = Subroutine
814 0     0   0 {my ($p) = @_; # Parameters
815 0         0 my $current = r15; # Index of the current character
816 0         0 my $middle = r14; # Index of the middle character
817 0         0 my $first = r13; # Index of the first character
818 0         0 my $address = r12; # Address of input string
819 0         0 my $size = r11; # Length of input utf32 string
820 0         0 my($c1, $c2) = (r8."b", r9."b"); # Lexical codes being tested
821              
822 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
823              
824 0         0 $$p{address}->setReg($address); # Address of string
825 0         0 $$p{size} ->setReg($size); # Size of string
826 0         0 Mov $current, 2; Mov $middle, 1; Mov $first, 0;
  0         0  
  0         0  
827              
828             For # Each character in input string
829 0         0 {my ($start, $end, $next) = @_; # Start, end and next labels
830              
831              
832 0         0 getLexicalCode $c1, $address, $middle; # Lexical code of the middle character
833 0         0 Cmp $c1, $WhiteSpace;
834             IfEq
835             Then
836 0         0 {getAlpha $c1, $address, $middle;
837              
838 0         0 Cmp $c1, $asciiNewLine;
839             IfEq # Middle character is a insignificant new line and thus could be a semicolon
840             Then
841 0         0 {getLexicalCode $c1, $address, $first;
842              
843             my sub makeSemiColon # Make a new line into a new line semicolon
844 0         0 {putLexicalCode $c2, $address, $middle, $NewLineSemiColon;
845             }
846              
847             my sub check_bpv # Make new line if followed by 'b', 'p' or 'v'
848 0         0 {getLexicalCode $c1, $address, $current;
849 0         0 Cmp $c1, $OpenBracket;
850              
851             IfEq
852             Then
853 0         0 {makeSemiColon;
854             },
855             Else
856 0         0 {Cmp $c1, $prefix;
857             IfEq
858             Then
859 0         0 {makeSemiColon;
860             },
861             Else
862 0         0 {Cmp $c1, $variable;
863             IfEq
864             Then
865 0         0 {makeSemiColon;
866 0         0 };
867 0         0 };
868 0         0 };
869             }
870              
871 0         0 Cmp $c1, $CloseBracket; # Check first character of sequence
872             IfEq
873             Then
874 0         0 {check_bpv;
875             },
876             Else
877 0         0 {Cmp $c1, $suffix;
878             IfEq
879             Then
880 0         0 {check_bpv;
881             },
882             Else
883 0         0 {Cmp $c1, $variable;
884             IfEq
885             Then
886 0         0 {check_bpv;
887 0         0 };
888 0         0 };
889 0         0 };
890 0         0 };
891 0         0 };
892              
893 0         0 Mov $first, $middle; Mov $middle, $current; # Find next lexical item
  0         0  
894 0         0 getLexicalCode $c1, $address, $current; # Current lexical code
895 0         0 Mov $middle, $current;
896 0         0 Inc $current; # Next possible character
897             For
898 0         0 {my ($start, $end, $next) = @_;
899 0         0 getLexicalCode $c2, $address, $current; # Lexical code of next character
900 0         0 Cmp $c1, $c2;
901 0         0 Jne $end; # Terminate when we are in a different lexical item
902 0         0 } $current, $size;
903 0         0 } $current, $size;
904              
905 0         0 PopR;
906 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyNewLines);
907              
908 0         0 $s->call(@parameters);
909             } # ClassifyNewLines
910              
911             sub ClassifyWhiteSpace(@) #P Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
912 0     0 1 0 {my (@parameters) = @_; # Parameters
913 0 0       0 @_ >= 1 or confess "One or more parameters";
914              
915             my $s = Subroutine
916 0         0 {my ($p) = @_; # Parameters
917 0         0 my $eb = r15."b"; # Lexical type of current char
918 0         0 my $s = r14; # State of white space between 'a'
919 0         0 my $S = r13; # State of white space before 'a'
920 0         0 my $cb = r12."b"; # Actual character within alphabet
921 0         0 my $address = r11; # Address of input string
922 0         0 my $index = r10; # Index of current char
923 0         0 my ($w1, $w2) = (r8."b", r9."b"); # Temporary work registers
924              
925             my sub getAlpha($;$) # Load the position of a lexical item in its alphabet from the current character
926 0         0 {my ($register, $indexReg) = @_; # Register to load, optional index register
927 0   0     0 getAlpha $register, $address, $index // $indexReg # Supplied index or default
928             };
929              
930             my sub getLexicalCode() # Load the lexical code of the current character in memory into the current character
931 0         0 {getLexicalCode $eb, $address, $index; # Supplied index or default
932             };
933              
934             my sub putLexicalCode($;$) # Put the specified lexical code into the current character in memory.
935 0         0 {my ($code, $indexReg) = @_; # Code, optional index register
936 0   0     0 putLexicalCode $w1, $address, ($indexReg//$index), $code;
937             };
938              
939 0         0 PushR r8, r9, r10, r11, r12, r13, r14, r15;
940              
941 0         0 $$p{address}->setReg($address); # Address of string
942 0         0 Mov $s, -1; Mov $S, -1; Mov $index, 0; # Initial states, position
  0         0  
  0         0  
943              
944             $$p{size}->for(sub # Each character in expression
945 0         0 {my ($indexVariable, $start, $next, $end) = @_;
946              
947 0         0 $indexVariable->setReg($index);
948 0         0 getLexicalCode; # Current lexical code
949              
950             AndBlock # Trap space before new line and detect new line after ascii
951 0         0 {my ($end, $start) = @_;
952 0         0 Cmp $index, 0; Je $end; # Start beyond the first character so we can look back one character.
  0         0  
953 0         0 Cmp $eb, $Ascii; Jne $end; # Current is ascii
  0         0  
954              
955 0         0 Mov $w1, "[$address+$indexScale*$index-$indexScale+$lexCodeOffset]"; # Previous lexical code
956 0         0 Cmp $w1, $Ascii; Jne $end; # Previous is ascii
  0         0  
957              
958 0         0 if (1) # Check for 's' followed by 'n' and 'a' followed by 'n'
959 0         0 {Mov $w1, "[$address+$indexScale*$index-$indexScale]"; # Previous character
960 0         0 getAlpha $w2; # Current character
961              
962 0         0 Cmp $w1, $asciiSpace; # Check for space followed by new line
963             IfEq
964             Then
965 0         0 {Cmp $w2, $asciiNewLine;
966             IfEq # Disallow 's' followed by 'n'
967             Then
968 0         0 {PrintErrStringNL "Space detected before new line at index:";
969 0         0 PrintErrRegisterInHex $index;
970 0         0 PrintErrTraceBack;
971 0         0 Exit(1);
972 0         0 };
973 0         0 };
974              
975 0         0 Cmp $w1, $asciiSpace; Je $end; # Check for 'a' followed by 'n'
  0         0  
976 0         0 Cmp $w1, $asciiNewLine; Je $end; # Current is 'a' but not 'n' or 's'
  0         0  
977 0         0 Cmp $w2, $asciiNewLine; Jne $end; # Current is 'n'
  0         0  
978              
979 0         0 putLexicalCode $WhiteSpace; # Mark new line as significant
980             }
981 0         0 };
982              
983             AndBlock # Spaces and new lines between other ascii
984 0         0 {my ($end, $start) = @_;
985 0         0 Cmp $s, -1;
986             IfEq # Looking for opening ascii
987             Then
988 0         0 {Cmp $eb, $Ascii; Jne $end; # Not ascii
  0         0  
989 0         0 getAlpha $cb; # Current character
990 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
991 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
992             IfEq
993             Then
994 0         0 {Mov $s, $index; Inc $s; # Ascii not space nor new line
  0         0  
995 0         0 };
996 0         0 Jmp $end;
997             },
998             Else # Looking for closing ascii
999 0         0 {Cmp $eb, $Ascii;
1000             IfNe # Not ascii
1001             Then
1002 0         0 {Mov $s, -1;
1003 0         0 Jmp $end
1004 0         0 };
1005 0         0 getAlpha $cb; # Current character
1006 0         0 Cmp $cb, $asciiNewLine; Je $end; # Skip over new lines
  0         0  
1007 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip over spaces
  0         0  
1008              
1009             For # Move over spaces and new lines between two ascii characters that are neither of new line or space
1010 0         0 {my ($start, $end, $next) = @_;
1011 0         0 getAlpha $cb, $s; # Check for 's' or 'n'
1012 0         0 Cmp $cb, $asciiSpace;
1013             IfEq
1014             Then
1015 0         0 {putLexicalCode $WhiteSpace, $s; # Mark as significant white space.
1016 0         0 Jmp $next;
1017 0         0 };
1018 0         0 Cmp $cb, $asciiNewLine;
1019             IfEq
1020             Then
1021 0         0 {putLexicalCode $WhiteSpace; # Mark as significant new line
1022 0         0 Jmp $next;
1023 0         0 };
1024 0         0 } $s, $index;
1025              
1026 0         0 Mov $s, $index; Inc $s;
  0         0  
1027 0         0 };
1028 0         0 };
1029              
1030             AndBlock # Note: 's' preceding 'a' are significant
1031 0         0 {my ($end, $start) = @_;
1032 0         0 Cmp $S, -1;
1033             IfEq # Looking for 's'
1034             Then
1035 0         0 {Cmp $eb, $Ascii; # Not 'a'
1036             IfNe
1037             Then
1038 0         0 {Mov $S, -1;
1039 0         0 Jmp $end
1040 0         0 };
1041 0         0 getAlpha $cb; # Actual character in alphabet
1042 0         0 Cmp $cb, $asciiSpace; # Space
1043             IfEq
1044             Then
1045 0         0 {Mov $S, $index;
1046 0         0 Jmp $end;
1047 0         0 };
1048             },
1049             Else # Looking for 'a'
1050 0         0 {Cmp $eb, $Ascii; # Not 'a'
1051             IfNe
1052             Then
1053 0         0 {Mov $S, -1;
1054 0         0 Jmp $end
1055 0         0 };
1056 0         0 getAlpha $cb; # Actual character in alphabet
1057 0         0 Cmp $cb, $asciiSpace; Je $end; # Skip 's'
  0         0  
1058              
1059 0         0 Cmp $cb, $asciiNewLine;
1060             IfEq # New lines prevent 's' from preceding 'a'
1061             Then
1062 0         0 {Mov $s, -1;
1063 0         0 Jmp $end
1064 0         0 };
1065              
1066             For # Move over spaces to non space ascii
1067 0         0 {my ($start, $end, $next) = @_;
1068 0         0 putLexicalCode $WhiteSpace, $S; # Mark new line as significant
1069 0         0 } $S, $index;
1070 0         0 Mov $S, -1; # Look for next possible space
1071             }
1072 0         0 };
  0         0  
1073 0         0 });
1074              
1075             $$p{size}->for(sub # Invert white space so that significant white space becomes ascii and the remainder is ignored
1076 0         0 {my ($indexVariable, $start, $next, $end) = @_;
1077              
1078 0         0 $indexVariable->setReg($index);
1079 0         0 getLexicalCode; # Current lexical code
1080              
1081             AndBlock # Invert non significant white space
1082 0         0 {my ($end, $start) = @_;
1083 0         0 Cmp $eb, $Ascii;
1084 0         0 Jne $end; # Ascii
1085              
1086 0         0 getAlpha $cb; # Actual character in alphabet
1087 0         0 Cmp $cb, $asciiSpace;
1088             IfEq
1089             Then
1090 0         0 {putLexicalCode $WhiteSpace;
1091 0         0 Jmp $next;
1092 0         0 };
1093 0         0 Cmp $cb, $asciiNewLine;
1094             IfEq
1095             Then
1096 0         0 {putLexicalCode $WhiteSpace; # Mark new line as not significant
1097 0         0 Jmp $next;
1098 0         0 };
1099 0         0 };
1100              
1101             AndBlock # Mark significant white space
1102 0         0 {my ($end, $start) = @_;
1103 0         0 Cmp $eb, $WhiteSpace; Jne $end; # Not significant white space
  0         0  
1104 0         0 putLexicalCode $Ascii; # Mark as ascii
1105 0         0 };
1106 0         0 });
1107              
1108 0         0 PopR;
1109 0         0 } [qw(address size)], name => q(Unisyn::Parse::ClassifyWhiteSpace);
1110              
1111 0         0 $s->call(@parameters);
1112             } # ClassifyWhiteSpace
1113              
1114             sub reload($$) #P Reload the variables associated with a parse.
1115 0     0 1 0 {my ($parse, $parameters) = @_; # Parse, hash of variable parameters
1116 0 0       0 @_ >= 1 or confess "One or more parameters";
1117              
1118             $parse->quarks->reload (arena => $$parameters{bs}, # Reload the quarks because the quarks used to create this subroutine might not be the same as the quarks that are reusing it now.
1119             array => $$parameters{numbersToStringsFirst},
1120 0         0 tree => $$parameters{stringsToNumbersFirst});
1121              
1122             $parse->operators->reload(arena => $$parameters{bs}, # Reload the subQuarks because the subQuarks used to create this subroutine might not be the same as the subQuarks that are reusing it now.
1123             array => $$parameters{opNumbersToStringsFirst},
1124 0 0       0 tree => $$parameters{opStringsToNumbersFirst}) if $parse->operators;
1125             }
1126              
1127             sub parseUtf8($@) #P Parse a unisyn expression encoded as utf8 and return the parse tree.
1128 0     0 1 0 {my ($parse, @parameters) = @_; # Parse, parameters
1129 0 0       0 @_ >= 1 or confess "One or more parameters";
1130              
1131             my $s = Subroutine
1132 0         0 {my ($p, $s) = @_; # Parameters
1133 0         0 $ParseUtf8SubDef = $s; # Save the sub definition globally so that we can forward its parameter list to L.
1134              
1135 0         0 $parse->reload($p); # Reload the parse description
1136 0 0       0 PrintErrStringNL "ParseUtf8" if $debug;
1137              
1138 0         0 PushR $parseStackBase, map {"r$_"} 8..15;
  0         0  
1139 0         0 PushZmm 0..1; PushMask 0..2; # Used to hold arena and classifiers. Zmm0 is used to as a short string to quark the lexical item strings.
  0         0  
1140              
1141 0         0 my $source32 = $$p{source32};
1142 0         0 my $sourceSize32 = $$p{sourceSize32};
1143 0         0 my $sourceLength32 = $$p{sourceLength32};
1144              
1145             ConvertUtf8ToUtf32 u8 => $$p{address}, size8 => $$p{size}, # Convert to utf32
1146 0         0 u32 => $source32, size32 => $sourceSize32,
1147             count => $sourceLength32;
1148              
1149             my sub PrintUtf32($$) # Print a utf32 string in hexadecimal
1150 0         0 {my ($size, $address) = @_; # Variable size, variable address
1151 0         0 $address->printErrMemoryInHexNL($size);
1152             }
1153              
1154 0 0       0 if ($debug)
1155 0         0 {PrintErrStringNL "After conversion from utf8 to utf32";
1156 0         0 $sourceSize32 ->errNL("Output Length: "); # Write output length
1157 0         0 PrintUtf32($sourceSize32, $source32); # Print utf32
1158             }
1159              
1160 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{lexicalLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1161 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{lexicalHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1162              
1163 0         0 ClassifyWithInRangeAndSaveOffset address=>$source32, size=>$sourceLength32; # Alphabetic classification
1164 0 0       0 if ($debug)
1165 0         0 {PrintErrStringNL "After classification into alphabet ranges";
1166 0         0 PrintUtf32($sourceSize32, $source32); # Print classified utf32
1167             }
1168              
1169 0         0 Vmovdqu8 zmm0, "[".Rd(join ', ', $Lex->{bracketsLow} ->@*)."]"; # Each double is [31::24] Classification, [21::0] Utf32 start character
1170 0         0 Vmovdqu8 zmm1, "[".Rd(join ', ', $Lex->{bracketsHigh}->@*)."]"; # Each double is [31::24] Range offset, [21::0] Utf32 end character
1171              
1172 0         0 ClassifyWithInRange address=>$source32, size=>$sourceLength32; # Bracket classification
1173 0 0       0 if ($debug)
1174 0         0 {PrintErrStringNL "After classification into brackets";
1175 0         0 PrintUtf32($sourceSize32, $source32); # Print classified brackets
1176             }
1177              
1178 0         0 my $opens = V(opens, -1);
1179 0         0 MatchBrackets address=>$source32, size=>$sourceLength32, $opens, $$p{fail}; # Match brackets
1180 0 0       0 if ($debug)
1181 0         0 {PrintErrStringNL "After bracket matching";
1182 0         0 PrintUtf32($sourceSize32, $source32); # Print matched brackets
1183             }
1184              
1185 0         0 ClassifyWhiteSpace address=>$source32, size=>$sourceLength32; # Classify white space
1186 0 0       0 if ($debug)
1187 0         0 {PrintErrStringNL "After white space classification";
1188 0         0 PrintUtf32($sourceSize32, $source32);
1189             }
1190              
1191 0         0 ClassifyNewLines address=>$source32, size=>$sourceLength32; # Classify new lines
1192 0 0       0 if ($debug)
1193 0         0 {PrintErrStringNL "After classifying new lines";
1194 0         0 PrintUtf32($sourceSize32, $source32);
1195             }
1196              
1197 0         0 $$p{source32} ->setReg($start); # Start of expression string after it has been classified
1198 0         0 $$p{sourceLength32}->setReg($size); # Number of characters in the expression
1199 0         0 Mov $parseStackBase, rsp; # Set base of parse stack
1200              
1201 0         0 parseExpression; # Parse the expression
1202              
1203 0         0 $$p{parse}->getReg(r15); # Number of characters in the expression
1204 0         0 Mov rsp, $parseStackBase; # Remove parse stack
1205              
1206 0 0       0 $$p{parse}->errNL if $debug;
1207              
1208 0         0 PopMask; PopZmm; PopR;
  0         0  
  0         0  
1209              
1210             }
1211 0         0 [qw(bs address size parse fail source32 sourceSize32 sourceLength32),
1212             qw(numbersToStringsFirst stringsToNumbersFirst),
1213             qw(opNumbersToStringsFirst opStringsToNumbersFirst)],
1214             name => q(Unisyn::Parse::parseUtf8);
1215              
1216 0         0 my $op = $parse->operators; # The operator methods if supplied
1217 0         0 my $zero = K(zero, 0);
1218              
1219 0 0       0 $s->call # Parameterize the parse
    0          
1220             (bs => $parse->arena->bs,
1221             address => $parse->address8,
1222             fail => $parse->fails,
1223             parse => $parse->parse,
1224             size => $parse->size8,
1225             source32 => $parse->source32,
1226             sourceLength32 => $parse->sourceLength32,
1227             sourceSize32 => $parse->sourceSize32,
1228             numbersToStringsFirst => $parse->quarks->numbersToStrings->first,
1229             stringsToNumbersFirst => $parse->quarks->stringsToNumbers->first,
1230             opNumbersToStringsFirst => $op ? $op->numbersToStrings->first : $zero,
1231             opStringsToNumbersFirst => $op ? $op->stringsToNumbers->first : $zero,
1232             );
1233             } # parseUtf8
1234              
1235             #D1 Traverse # Traverse the parse tree
1236              
1237             sub traverseParseTree($) # Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
1238 0     0 1 0 {my ($parse) = @_; # Parse tree
1239              
1240             my $s = Subroutine # Print a tree
1241 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1242 0         0 my $t = Nasm::X86::DescribeTree (arena=>$$p{bs}, first=>$$p{first});
1243 0         0 $t->find(K(key, $opType)); # The lexical type of the element - normally a term
1244              
1245             If $t->found == 0, # Not found lexical type of element
1246             Then
1247 0         0 {PrintOutString "No type for node";
1248 0         0 Exit(1);
1249 0         0 };
1250              
1251             If $t->data != $term, # Expected a term
1252             Then
1253 0         0 {PrintOutString "Expected a term";
1254 0         0 Exit(1);
1255 0         0 };
1256              
1257 0         0 my $operands = V(operands); # Number of operands
1258 0         0 $t->find(K(key, 1)); # Key 1 tells us the number of operands
1259             If $t->found > 0, # Found key 1
1260             Then
1261 0         0 {$operands->copy($t->data); # Number of operands
1262             },
1263             Else
1264 0         0 {PrintOutString "Expected at least one operand";
1265 0         0 Exit(1);
1266 0         0 };
1267              
1268             $operands->for(sub # Each operand
1269 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1270 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1271 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1272 0         0 $t->find($i+$lexItemOffset); my $off = V(key) ->copy($t->data); # Offset of first block of sub tree
  0         0  
1273              
1274             If $lex == $term, # Term
1275             Then
1276 0         0 {$s->call($$p{bs}, first => $off); # Traverse sub tree referenced by offset field
1277 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1278             },
1279 0         0 });
  0         0  
1280              
1281 0         0 $t->find(K(key, $opSub)); # The subroutine for the term
1282             If $t->found > 0, # Found subroutine for term
1283             Then # Call subroutine for this term
1284             {#PushR r15, zmm0;
1285             my $p = Subroutine # Prototype subroutine to establish parameter list
1286 0         0 {} [qw(tree call)], with => $s,
1287             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::prototype";
1288              
1289             my $d = Subroutine # Dispatcher
1290 0         0 {my ($q, $sub) = @_;
1291 0         0 $p->dispatchV($$q{call}, r15);
1292 0         0 } [], with => $p,
1293             name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem::dispatch";
1294              
1295             If $t->data > 0,
1296             Then
1297 0         0 {$d->call(tree => $t->first, call => $t->data) # Call sub associated with the lexical item
1298 0         0 };
1299             # my $p = Subroutine # Subroutine
1300             # {my ($parameters) = @_; # Parameters
1301             # $$parameters{call}->setReg(r15);
1302             # Call r15;
1303             # } [qw(tree call)], with => $s,
1304             # name => __PACKAGE__."TraverseParseTree::ProcessLexicalItem";
1305             #
1306             # my $l = RegisterSize rax;
1307             # $$p{bs} ->putQIntoZmm(0, 0*$l, r15);
1308             # $$p{first}->putQIntoZmm(0, 1*$l, r15);
1309             # $t->data ->setReg(r15);
1310             # Call r15;
1311             # #PopR;
1312 0         0 };
1313              
1314 0         0 } [qw(bs first)], name => "Nasm::X86::Tree::traverseParseTree";
1315              
1316 0         0 PushR r15, zmm0;
1317 0         0 $s->call($parse->arena->bs, first => $parse->parse);
1318 0         0 PopR;
1319              
1320 0         0 $a
1321             } # traverseParseTree
1322              
1323             #D1 Print # Print a parse tree
1324              
1325             sub printLexicalItem($$$$) #P Print the utf8 string corresponding to a lexical item at a variable offset.
1326 0     0 1 0 {my ($parse, $source32, $offset, $size) = @_; # Parse tree, B
of utf32 source representation, B to lexical item in utf32, B in utf32 chars of item
1327 0         0 my $t = $parse->arena->DescribeTree;
1328              
1329             my $s = Subroutine
1330 0     0   0 {my ($p, $s) = @_; # Parameters
1331 0         0 PushR r12, r13, r14, r15;
1332              
1333 0         0 $$p{source32}->setReg(r14);
1334 0         0 $$p{offset} ->setReg(r15);
1335 0         0 Lea r13, "[r14+4*r15]"; # Address lexical item
1336 0         0 Mov eax, "[r13]"; # First lexical item clearing rax
1337 0         0 Shr rax, 24; # First lexical item type in lowest byte and all else cleared
1338              
1339 0         0 my $success = Label;
1340 0         0 my $print = Label;
1341              
1342 0         0 Cmp rax, $bracketsBase; # Test for brackets
1343             IfGe
1344             Then
1345 0         0 {my $o = $Lex->{bracketsOpen}; # Opening brackets
1346 0         0 my $c = $Lex->{bracketsClose}; # Closing brackets
1347 0         0 my $O = Rutf8 map {($_, chr(0))} @$o; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1348 0         0 my $C = Rutf8 map {($_, chr(0))} @$c; # Brackets in 3 bytes of utf8 each, with each bracket followed by a zero to make 4 bytes which is more easily addressed
  0         0  
1349 0         0 Mov r14, $O; # Address open bracket
1350 0         0 Mov r15, rax; # The bracket number
1351 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Index to bracket
1352 0         0 PrintOutUtf8Char; # Print opening bracket
1353 0         0 Mov r14, $C; # Address close bracket
1354 0         0 Lea rax, "[r14+4*r15 - 4*$bracketsBase-4]"; # Closing brackets occupy 3 bytes
1355 0         0 PrintOutUtf8Char; # Print closing bracket
1356 0         0 Jmp $success;
1357 0         0 };
1358              
1359 0         0 Mov r12, -1; # Alphabet to use
1360 0         0 Cmp rax, $variable; # Test for variable
1361             IfEq
1362             Then
1363 0         0 {my $b = $Lex->{alphabetsOrdered}{variable}; # Load variable alphabet in dwords
1364 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1365 0         0 my $a = Rd @b;
1366 0         0 Mov r12, $a;
1367 0         0 Jmp $print;
1368 0         0 };
1369              
1370 0         0 Cmp rax, $assign; # Assign operator
1371             IfEq
1372             Then
1373 0         0 {my $b = $Lex->{alphabetsOrdered}{assign};
1374 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1375 0         0 my $a = Rd @b;
1376 0         0 Mov r12, $a;
1377 0         0 Jmp $print;
1378 0         0 };
1379              
1380 0         0 Cmp rax, $dyad; # Dyad
1381             IfEq
1382             Then
1383 0         0 {my $b = $Lex->{alphabetsOrdered}{dyad};
1384 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1385 0         0 my $a = Rd @b;
1386 0         0 Mov r12, $a;
1387 0         0 Jmp $print;
1388 0         0 };
1389              
1390 0         0 Cmp rax, $Ascii; # Ascii
1391             IfEq
1392             Then
1393 0         0 {my $b = $Lex->{alphabetsOrdered}{Ascii};
1394 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1395 0         0 my $a = Rd @b;
1396 0         0 Mov r12, $a;
1397 0         0 Jmp $print;
1398 0         0 };
1399              
1400 0         0 Cmp rax, $prefix; # Prefix
1401             IfEq
1402             Then
1403 0         0 {my $b = $Lex->{alphabetsOrdered}{prefix};
1404 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1405 0         0 my $a = Rd @b;
1406 0         0 Mov r12, $a;
1407 0         0 Jmp $print;
1408 0         0 };
1409              
1410 0         0 Cmp rax, $suffix; # Suffix
1411             IfEq
1412             Then
1413 0         0 {my $b = $Lex->{alphabetsOrdered}{suffix};
1414 0         0 my @b = map {convertUtf32ToUtf8LE $_} @$b;
  0         0  
1415 0         0 my $a = Rd @b;
1416 0         0 Mov r12, $a;
1417 0         0 Jmp $print;
1418 0         0 };
1419              
1420 0         0 PrintErrTraceBack; # Unknown lexical type
1421 0         0 PrintErrStringNL "Alphabet not found for unexpected lexical item";
1422 0         0 PrintErrRegisterInHex rax;
1423 0         0 Exit(1);
1424              
1425 0         0 SetLabel $print; # Decoded
1426              
1427             $$p{size}->for(sub # Write each letter out from its position on the stack
1428 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1429 0         0 $index->setReg(r14); # Index stack
1430 0         0 ClearRegisters r15; # Next instruction does not clear the entire register
1431 0         0 Mov r15b, "[r13+4*r14]"; # Load alphabet offset from stack
1432 0         0 Shl r15, 2; # Each letter is 4 bytes wide in utf8
1433 0         0 Lea rax, "[r12+r15]"; # Address alphabet letter as utf8
1434 0         0 PrintOutUtf8Char; # Print utf8 character
1435 0         0 });
1436              
1437 0         0 SetLabel $success; # Done
1438              
1439 0         0 PopR;
1440 0         0 } [qw(offset source32 size)],
1441             name => q(Unisyn::Parse::printLexicalItem);
1442              
1443 0         0 $s->call(offset => $offset, source32 => $source32, size => $size);
1444             }
1445              
1446             sub print($) # Print a parse tree.
1447 0     0 1 0 {my ($parse) = @_; # Parse tree
1448 0         0 my $t = $parse->arena->DescribeTree;
1449              
1450 0         0 PushR my ($depthR) = (r12); # Recursion depth
1451              
1452             my $b = Subroutine # Print the spacing blanks to offset sub trees
1453             {V(loop, $depthR)->for(sub
1454 0         0 {PrintOutString " ";
1455 0     0   0 });
1456 0         0 } [], name => "Nasm::X86::Tree::dump::spaces";
1457              
1458             my $s = Subroutine # Print a tree
1459 0     0   0 {my ($p, $s) = @_; # Parameters, sub definition
1460              
1461 0         0 my $B = $$p{bs};
1462              
1463 0         0 $t->address->copy($$p{bs});
1464 0         0 $t->first ->copy($$p{first});
1465 0         0 $t->find(K(key, 0)); # Key 0 tells us the type of the element - normally a term
1466              
1467             If $t->found == 0, # Not found key 0
1468             Then
1469 0         0 {PrintOutString "No type for node";
1470 0         0 Exit(1);
1471 0         0 };
1472              
1473             If $t->data != $term, # Expected a term
1474             Then
1475 0         0 {PrintOutString "Expected a term";
1476 0         0 Exit(1);
1477 0         0 };
1478              
1479 0         0 my $operands = V(operands); # Number of operands
1480 0         0 $t->find(K(key, 1)); # Key 1 tells us the number of operands
1481             If $t->found > 0, # Found key 1
1482             Then
1483 0         0 {$operands->copy($t->data); # Number of operands
1484             },
1485             Else
1486 0         0 {PrintOutString "Expected at least one operand";
1487 0         0 Exit(1);
1488 0         0 };
1489              
1490             $operands->for(sub # Each operand
1491 0         0 {my ($index, $start, $next, $end) = @_; # Execute body
1492 0         0 my $i = (1 + $index) * $lexItemWidth; # Operand detail
1493 0         0 $t->find($i+$lexItemType); my $lex = V(key) ->copy($t->data); # Lexical type
  0         0  
1494 0         0 $t->find($i+$lexItemOffset); my $off = V(data)->copy($t->data); # Offset in source
  0         0  
1495 0         0 $t->find($i+$lexItemLength); my $len = V(data)->copy($t->data); # Length in source
  0         0  
1496              
1497 0         0 $b->call; # Indent
1498              
1499             If $lex == $term, # Term
1500             Then
1501 0         0 {PrintOutStringNL "Term";
1502 0         0 Inc $depthR; # Increase indentation for sub terms
1503 0         0 $s->call($B, first => $off, $$p{source32}); # Print sub tree referenced by offset field
1504 0         0 Dec $depthR; # Restore existing indentation
1505 0         0 $t->first ->copy($$p{first}); # Re-establish addressability to the tree after the recursive call
1506             },
1507              
1508 0         0 Ef {$lex == $semiColon} # Semicolon
1509             Then
1510 0         0 {PrintOutStringNL "Semicolon";
1511             },
1512              
1513             Else
1514             {If $lex == $variable, # Variable
1515             Then
1516 0         0 {PrintOutString "Variable: ";
1517             },
1518              
1519 0         0 Ef {$lex == $assign} # Assign
1520             Then
1521 0         0 {PrintOutString "Assign: ";
1522             },
1523              
1524 0         0 Ef {$lex == $prefix} # Prefix
1525             Then
1526 0         0 {PrintOutString "Prefix: ";
1527             },
1528              
1529 0         0 Ef {$lex == $suffix} # Suffix
1530             Then
1531 0         0 {PrintOutString "Suffix: ";
1532             },
1533              
1534 0         0 Ef {$lex == $dyad} # Dyad
1535             Then
1536 0         0 {PrintOutString "Dyad: ";
1537             },
1538              
1539 0         0 Ef {$lex == $Ascii} # Ascii
1540             Then
1541 0         0 {PrintOutString "Ascii: ";
1542             },
1543              
1544             Else # Brackets
1545 0         0 {PrintOutString "Brackets: ";
1546 0         0 };
1547              
1548 0         0 $parse->printLexicalItem($$p{source32}, $off, $len); # Print the variable name
1549 0         0 PrintOutNL;
1550 0         0 };
1551              
1552             If $index == 0, # Operator followed by indented operands
1553             Then
1554 0         0 {Inc $depthR;
1555 0         0 };
1556 0         0 });
1557              
1558 0         0 Dec $depthR; # Reset indentation after operands
1559 0         0 } [qw(bs first source32)], name => "Nasm::X86::Tree::print";
1560              
1561 0         0 ClearRegisters $depthR; # Depth starts at zero
1562              
1563 0         0 $s->call($parse->arena->bs, first => $parse->parse, $parse->source32);
1564              
1565 0         0 PopR;
1566             } # print
1567              
1568             sub dumpParseTree($) # Dump the parse tree.
1569 0     0 1 0 {my ($parse) = @_; # Parse tree
1570 0         0 my $t = $parse->arena->DescribeTree;
1571 0         0 $t->first->copy($parse->parse);
1572 0         0 $t->dump;
1573             }
1574              
1575             #D1 Execute # Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
1576              
1577             sub lexToSub($$$$) # Map a lexical item to a processing subroutine.
1578 0     0 1 0 {my ($parse, $alphabet, $op, $sub) = @_; # Sub quarks, the alphabet number, the operator name in that alphabet, subroutine definition
1579 0         0 my $a = &lexicalData->{alphabetsOrdered}{$alphabet}; # Alphabet
1580 0         0 my $n = $$Lex{lexicals}{$alphabet}{number}; # Number of lexical type
1581 0         0 my %i = map {$$a[$_]=>$_} keys @$a;
  0         0  
1582 0         0 my @b = ($n, map {$i{ord $_}} split //, $op); # Bytes representing the operator name
  0         0  
1583 0         0 my $s = join '', map {chr $_} @b; # String representation
  0         0  
1584 0         0 $parse->operators->putSub($s, $sub); # Add the string, subroutine combination to the sub quarks
1585             }
1586              
1587             sub dyad($$$) # Define a method for a dyadic operator.
1588 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1589 0         0 $parse->lexToSub("dyad", $text, $sub);
1590             }
1591              
1592             sub assign($$$) # Define a method for an assign operator.
1593 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1594 0         0 $parse->lexToSub("assign", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1595             }
1596              
1597             sub prefix($$$) # Define a method for a prefix operator.
1598 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1599 0         0 $parse->lexToSub("prefix", $text, $sub); # Operator name in operator alphabet preceded by alphabet number
1600             }
1601              
1602             sub suffix($$$) # Define a method for a suffix operator.
1603 0     0 1 0 {my ($parse, $text, $sub) = @_; # Sub quarks, the name of the operator as a utf8 string, associated subroutine definition
1604 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1605 0         0 $parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1606             }
1607              
1608              
1609             sub ascii($$) # Define a method for ascii text.
1610 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1611 0         0 my $n = $$Lex{lexicals}{Ascii}{number}; # Lexical number of ascii
1612 0         0 $parse->operators->putSub(chr($n), $sub); # Add the ascii subroutine to the sub quarks
1613             }
1614              
1615             sub semiColon($$) # Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
1616 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1617 0         0 my $n = $$Lex{lexicals}{semiColon}{number}; # Lexical number of semicolon
1618 0         0 $parse->operators->putSub(chr($n), $sub); # Add the semicolon subroutine to the sub quarks
1619 0         0 my $N = $$Lex{lexicals}{NewLineSemiColon}{number}; # New line semi colon
1620 0         0 $parse->operators->putSub(chr($N), $sub); # Add the semicolon subroutine to the sub quarks
1621             }
1622              
1623             sub variable($$) # Define a method for a variable.
1624 0     0 1 0 {my ($parse, $sub) = @_; # Sub quarks, associated subroutine definition
1625 0         0 my $n = $$Lex{lexicals}{variable}{number}; # Lexical number of a variable
1626 0         0 $parse->operators->putSub(chr($n), $sub); # Add the variable subroutine to the sub quarks
1627             }
1628              
1629             sub bracket($$$) # Define a method for a bracket operator.
1630 0     0 1 0 {my ($parse, $open, $sub) = @_; # Sub quarks, opening parenthesis, associated subroutine
1631 0         0 my $l = &lexicalData;
1632 0         0 my $s = join '', sort $l->{bracketsOpen}->@*;#, $l->{bracketsClose}->@*; # Bracket alphabet
1633 0         0 my $b = index($s, $open);
1634 0 0       0 $b < 0 and confess "No such bracket: $open";
1635 0         0 my $n = $$Lex{lexicals}{OpenBracket}{number}; # Lexical number of open bracket
1636 0         0 $parse->operators->putSub(chr($n).chr($b+1+$l->{bracketsBase}), $sub); # Why plus one? # Add the brackets subroutine to the sub quarks
1637             }
1638              
1639             #D1 Alphabets # Translate between alphabets.
1640              
1641             sub showAlphabet($) #P Show an alphabet.
1642 0     0 1 0 {my ($alphabet) = @_; # Alphabet name
1643 0         0 my $out;
1644 0         0 my $lex = &lexicalData;
1645 0         0 my $abc = $lex->{alphabetsOrdered}{$alphabet};
1646 0         0 for my $a(@$abc)
1647 0         0 {$out .= chr($a);
1648             }
1649             $out
1650 0         0 }
1651              
1652             sub asciiToAssignLatin($) # Translate ascii to the corresponding letters in the assign latin alphabet.
1653 0     0 1 0 {my ($in) = @_; # A string of ascii
1654 1     1   16488 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧/r;
  1         2  
  1         16  
  0         0  
1655             }
1656              
1657             sub asciiToAssignGreek($) # Translate ascii to the corresponding letters in the assign greek alphabet.
1658 0     0 1 0 {my ($in) = @_; # A string of ascii
1659 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝛢𝛣𝛤𝛥𝛦𝛧𝛨𝛩𝛪𝛫𝛬𝛭𝛮𝛯𝛰𝛱𝛲𝛳𝛴𝛵𝛶𝛷𝛸𝛹𝛺𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔/r;
1660             }
1661              
1662             sub asciiToDyadLatin($) # Translate ascii to the corresponding letters in the dyad latin alphabet.
1663 0     0 1 0 {my ($in) = @_; # A string of ascii
1664 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳/r;
1665             }
1666              
1667             sub asciiToDyadGreek($) # Translate ascii to the corresponding letters in the dyad greek alphabet.
1668 0     0 1 0 {my ($in) = @_; # A string of ascii
1669 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝚨𝚩𝚪𝚫𝚬𝚭𝚮𝚯𝚰𝚱𝚲𝚳𝚴𝚵𝚶𝚷𝚸𝚹𝚺𝚻𝚼𝚽𝚾𝚿𝛀𝛂𝛃𝛄𝛅𝛆𝛇𝛈𝛉𝛊𝛋𝛌𝛍𝛎𝛏𝛐𝛑𝛒𝛓𝛔𝛕𝛖𝛗𝛘𝛙𝛚/r;
1670             }
1671              
1672             sub asciiToPrefixLatin($) # Translate ascii to the corresponding letters in the prefix latin alphabet.
1673 0     0 1 0 {my ($in) = @_; # A string of ascii
1674 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛/r;
1675             }
1676              
1677             sub asciiToPrefixGreek($) # Translate ascii to the corresponding letters in the prefix greek alphabet.
1678 0     0 1 0 {my ($in) = @_; # A string of ascii
1679 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝜜𝜝𝜞𝜟𝜠𝜡𝜢𝜣𝜤𝜥𝜦𝜧𝜨𝜩𝜪𝜫𝜬𝜭𝜮𝜯𝜰𝜱𝜲𝜳𝜴𝜶𝜷𝜸𝜹𝜺𝜻𝜼𝜽𝜾𝜿𝝀𝝁𝝂𝝃𝝄𝝅𝝆𝝇𝝈𝝉𝝊𝝋𝝌𝝍𝝎/r;
1680             }
1681              
1682             sub asciiToSuffixLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1683 0     0 1 0 {my ($in) = @_; # A string of ascii
1684 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯/r;
1685             }
1686              
1687             sub asciiToSuffixGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1688 0     0 1 0 {my ($in) = @_; # A string of ascii
1689 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝞐𝞑𝞒𝞓𝞔𝞕𝞖𝞗𝞘𝞙𝞚𝞛𝞜𝞝𝞞𝞟𝞠𝞡𝞢𝞣𝞤𝞥𝞦𝞧𝞨𝞪𝞫𝞬𝞭𝞮𝞯𝞰𝞱𝞲𝞳𝞴𝞵𝞶𝞷𝞸𝞹𝞺𝞻𝞼𝞽𝞾𝞿𝟀𝟁𝟂/r;
1690             }
1691              
1692             sub asciiToVariableLatin($) # Translate ascii to the corresponding letters in the suffix latin alphabet.
1693 0     0 1 0 {my ($in) = @_; # A string of ascii
1694 0         0 $in =~ tr/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇/r;
1695             }
1696              
1697             sub asciiToVariableGreek($) # Translate ascii to the corresponding letters in the suffix greek alphabet.
1698 0     0 1 0 {my ($in) = @_; # A string of ascii
1699 0         0 $in =~ tr/ABGDEZNHIKLMVXOPRQSTUFCYWabgdeznhiklmvxoprqstufcyw/𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺𝝻𝝼𝝽𝝾𝝿𝞀𝞁𝞂𝞃𝞄𝞅𝞆𝞇𝞈/r;
1700             }
1701              
1702             sub asciiToEscaped($) # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1703 0     0 1 0 {my ($in) = @_; # A string of ascii
1704 0         0 $in =~ tr/abcdefghijklmnopqrstuvwxyz/🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩/r;
1705             }
1706              
1707             sub semiColonChar() # Translate ascii to the corresponding letters in the escaped ascii alphabet.
1708             {chr(10210)
1709             }
1710              
1711             #d
1712 1     1 0 2 sub lexicalData {do {
1713 1         318 my $a = bless({
1714             alphabetRanges => 14,
1715             alphabets => {
1716             "circledLatinLetter" => "\x{24B6}\x{24B7}\x{24B8}\x{24B9}\x{24BA}\x{24BB}\x{24BC}\x{24BD}\x{24BE}\x{24BF}\x{24C0}\x{24C1}\x{24C2}\x{24C3}\x{24C4}\x{24C5}\x{24C6}\x{24C7}\x{24C8}\x{24C9}\x{24CA}\x{24CB}\x{24CC}\x{24CD}\x{24CE}\x{24CF}\x{24D0}\x{24D1}\x{24D2}\x{24D3}\x{24D4}\x{24D5}\x{24D6}\x{24D7}\x{24D8}\x{24D9}\x{24DA}\x{24DB}\x{24DC}\x{24DD}\x{24DE}\x{24DF}\x{24E0}\x{24E1}\x{24E2}\x{24E3}\x{24E4}\x{24E5}\x{24E6}\x{24E7}\x{24E8}\x{24E9}",
1717             "mathematicalBold" => "\x{1D400}\x{1D401}\x{1D402}\x{1D403}\x{1D404}\x{1D405}\x{1D406}\x{1D407}\x{1D408}\x{1D409}\x{1D40A}\x{1D40B}\x{1D40C}\x{1D40D}\x{1D40E}\x{1D40F}\x{1D410}\x{1D411}\x{1D412}\x{1D413}\x{1D414}\x{1D415}\x{1D416}\x{1D417}\x{1D418}\x{1D419}\x{1D41A}\x{1D41B}\x{1D41C}\x{1D41D}\x{1D41E}\x{1D41F}\x{1D420}\x{1D421}\x{1D422}\x{1D423}\x{1D424}\x{1D425}\x{1D426}\x{1D427}\x{1D428}\x{1D429}\x{1D42A}\x{1D42B}\x{1D42C}\x{1D42D}\x{1D42E}\x{1D42F}\x{1D430}\x{1D431}\x{1D432}\x{1D433}\x{1D6A8}\x{1D6A9}\x{1D6AA}\x{1D6AB}\x{1D6AC}\x{1D6AD}\x{1D6AE}\x{1D6AF}\x{1D6B0}\x{1D6B1}\x{1D6B2}\x{1D6B3}\x{1D6B4}\x{1D6B5}\x{1D6B6}\x{1D6B7}\x{1D6B8}\x{1D6B9}\x{1D6BA}\x{1D6BB}\x{1D6BC}\x{1D6BD}\x{1D6BE}\x{1D6BF}\x{1D6C0}\x{1D6C1}\x{1D6C2}\x{1D6C3}\x{1D6C4}\x{1D6C5}\x{1D6C6}\x{1D6C7}\x{1D6C8}\x{1D6C9}\x{1D6CA}\x{1D6CB}\x{1D6CC}\x{1D6CD}\x{1D6CE}\x{1D6CF}\x{1D6D0}\x{1D6D1}\x{1D6D2}\x{1D6D3}\x{1D6D4}\x{1D6D5}\x{1D6D6}\x{1D6D7}\x{1D6D8}\x{1D6D9}\x{1D6DA}\x{1D6DB}\x{1D6DC}\x{1D6DD}\x{1D6DE}\x{1D6DF}\x{1D6E0}\x{1D6E1}",
1718             "mathematicalBoldFraktur" => "\x{1D56C}\x{1D56D}\x{1D56E}\x{1D56F}\x{1D570}\x{1D571}\x{1D572}\x{1D573}\x{1D574}\x{1D575}\x{1D576}\x{1D577}\x{1D578}\x{1D579}\x{1D57A}\x{1D57B}\x{1D57C}\x{1D57D}\x{1D57E}\x{1D57F}\x{1D580}\x{1D581}\x{1D582}\x{1D583}\x{1D584}\x{1D585}\x{1D586}\x{1D587}\x{1D588}\x{1D589}\x{1D58A}\x{1D58B}\x{1D58C}\x{1D58D}\x{1D58E}\x{1D58F}\x{1D590}\x{1D591}\x{1D592}\x{1D593}\x{1D594}\x{1D595}\x{1D596}\x{1D597}\x{1D598}\x{1D599}\x{1D59A}\x{1D59B}\x{1D59C}\x{1D59D}\x{1D59E}\x{1D59F}",
1719             "mathematicalBoldItalic" => "\x{1D468}\x{1D469}\x{1D46A}\x{1D46B}\x{1D46C}\x{1D46D}\x{1D46E}\x{1D46F}\x{1D470}\x{1D471}\x{1D472}\x{1D473}\x{1D474}\x{1D475}\x{1D476}\x{1D477}\x{1D478}\x{1D479}\x{1D47A}\x{1D47B}\x{1D47C}\x{1D47D}\x{1D47E}\x{1D47F}\x{1D480}\x{1D481}\x{1D482}\x{1D483}\x{1D484}\x{1D485}\x{1D486}\x{1D487}\x{1D488}\x{1D489}\x{1D48A}\x{1D48B}\x{1D48C}\x{1D48D}\x{1D48E}\x{1D48F}\x{1D490}\x{1D491}\x{1D492}\x{1D493}\x{1D494}\x{1D495}\x{1D496}\x{1D497}\x{1D498}\x{1D499}\x{1D49A}\x{1D49B}\x{1D71C}\x{1D71D}\x{1D71E}\x{1D71F}\x{1D720}\x{1D721}\x{1D722}\x{1D723}\x{1D724}\x{1D725}\x{1D726}\x{1D727}\x{1D728}\x{1D729}\x{1D72A}\x{1D72B}\x{1D72C}\x{1D72D}\x{1D72E}\x{1D72F}\x{1D730}\x{1D731}\x{1D732}\x{1D733}\x{1D734}\x{1D735}\x{1D736}\x{1D737}\x{1D738}\x{1D739}\x{1D73A}\x{1D73B}\x{1D73C}\x{1D73D}\x{1D73E}\x{1D73F}\x{1D740}\x{1D741}\x{1D742}\x{1D743}\x{1D744}\x{1D745}\x{1D746}\x{1D747}\x{1D748}\x{1D749}\x{1D74A}\x{1D74B}\x{1D74C}\x{1D74D}\x{1D74E}\x{1D74F}\x{1D750}\x{1D751}\x{1D752}\x{1D753}\x{1D754}\x{1D755}",
1720             "mathematicalBoldScript" => "\x{1D4D0}\x{1D4D1}\x{1D4D2}\x{1D4D3}\x{1D4D4}\x{1D4D5}\x{1D4D6}\x{1D4D7}\x{1D4D8}\x{1D4D9}\x{1D4DA}\x{1D4DB}\x{1D4DC}\x{1D4DD}\x{1D4DE}\x{1D4DF}\x{1D4E0}\x{1D4E1}\x{1D4E2}\x{1D4E3}\x{1D4E4}\x{1D4E5}\x{1D4E6}\x{1D4E7}\x{1D4E8}\x{1D4E9}\x{1D4EA}\x{1D4EB}\x{1D4EC}\x{1D4ED}\x{1D4EE}\x{1D4EF}\x{1D4F0}\x{1D4F1}\x{1D4F2}\x{1D4F3}\x{1D4F4}\x{1D4F5}\x{1D4F6}\x{1D4F7}\x{1D4F8}\x{1D4F9}\x{1D4FA}\x{1D4FB}\x{1D4FC}\x{1D4FD}\x{1D4FE}\x{1D4FF}\x{1D500}\x{1D501}\x{1D502}\x{1D503}",
1721             "mathematicalDouble-struck" => "\x{1D538}\x{1D539}\x{1D53B}\x{1D53C}\x{1D53D}\x{1D53E}\x{1D540}\x{1D541}\x{1D542}\x{1D543}\x{1D544}\x{1D546}\x{1D54A}\x{1D54B}\x{1D54C}\x{1D54D}\x{1D54E}\x{1D54F}\x{1D550}\x{1D552}\x{1D553}\x{1D554}\x{1D555}\x{1D556}\x{1D557}\x{1D558}\x{1D559}\x{1D55A}\x{1D55B}\x{1D55C}\x{1D55D}\x{1D55E}\x{1D55F}\x{1D560}\x{1D561}\x{1D562}\x{1D563}\x{1D564}\x{1D565}\x{1D566}\x{1D567}\x{1D568}\x{1D569}\x{1D56A}\x{1D56B}",
1722             "mathematicalFraktur" => "\x{1D504}\x{1D505}\x{1D507}\x{1D508}\x{1D509}\x{1D50A}\x{1D50D}\x{1D50E}\x{1D50F}\x{1D510}\x{1D511}\x{1D512}\x{1D513}\x{1D514}\x{1D516}\x{1D517}\x{1D518}\x{1D519}\x{1D51A}\x{1D51B}\x{1D51C}\x{1D51E}\x{1D51F}\x{1D520}\x{1D521}\x{1D522}\x{1D523}\x{1D524}\x{1D525}\x{1D526}\x{1D527}\x{1D528}\x{1D529}\x{1D52A}\x{1D52B}\x{1D52C}\x{1D52D}\x{1D52E}\x{1D52F}\x{1D530}\x{1D531}\x{1D532}\x{1D533}\x{1D534}\x{1D535}\x{1D536}\x{1D537}",
1723             "mathematicalItalic" => "\x{1D434}\x{1D435}\x{1D436}\x{1D437}\x{1D438}\x{1D439}\x{1D43A}\x{1D43B}\x{1D43C}\x{1D43D}\x{1D43E}\x{1D43F}\x{1D440}\x{1D441}\x{1D442}\x{1D443}\x{1D444}\x{1D445}\x{1D446}\x{1D447}\x{1D448}\x{1D449}\x{1D44A}\x{1D44B}\x{1D44C}\x{1D44D}\x{1D44E}\x{1D44F}\x{1D450}\x{1D451}\x{1D452}\x{1D453}\x{1D454}\x{1D456}\x{1D457}\x{1D458}\x{1D459}\x{1D45A}\x{1D45B}\x{1D45C}\x{1D45D}\x{1D45E}\x{1D45F}\x{1D460}\x{1D461}\x{1D462}\x{1D463}\x{1D464}\x{1D465}\x{1D466}\x{1D467}\x{1D6E2}\x{1D6E3}\x{1D6E4}\x{1D6E5}\x{1D6E6}\x{1D6E7}\x{1D6E8}\x{1D6E9}\x{1D6EA}\x{1D6EB}\x{1D6EC}\x{1D6ED}\x{1D6EE}\x{1D6EF}\x{1D6F0}\x{1D6F1}\x{1D6F2}\x{1D6F3}\x{1D6F4}\x{1D6F5}\x{1D6F6}\x{1D6F7}\x{1D6F8}\x{1D6F9}\x{1D6FA}\x{1D6FB}\x{1D6FC}\x{1D6FD}\x{1D6FE}\x{1D6FF}\x{1D700}\x{1D701}\x{1D702}\x{1D703}\x{1D704}\x{1D705}\x{1D706}\x{1D707}\x{1D708}\x{1D709}\x{1D70A}\x{1D70B}\x{1D70C}\x{1D70D}\x{1D70E}\x{1D70F}\x{1D710}\x{1D711}\x{1D712}\x{1D713}\x{1D714}\x{1D715}\x{1D716}\x{1D717}\x{1D718}\x{1D719}\x{1D71A}\x{1D71B}",
1724             "mathematicalMonospace" => "\x{1D670}\x{1D671}\x{1D672}\x{1D673}\x{1D674}\x{1D675}\x{1D676}\x{1D677}\x{1D678}\x{1D679}\x{1D67A}\x{1D67B}\x{1D67C}\x{1D67D}\x{1D67E}\x{1D67F}\x{1D680}\x{1D681}\x{1D682}\x{1D683}\x{1D684}\x{1D685}\x{1D686}\x{1D687}\x{1D688}\x{1D689}\x{1D68A}\x{1D68B}\x{1D68C}\x{1D68D}\x{1D68E}\x{1D68F}\x{1D690}\x{1D691}\x{1D692}\x{1D693}\x{1D694}\x{1D695}\x{1D696}\x{1D697}\x{1D698}\x{1D699}\x{1D69A}\x{1D69B}\x{1D69C}\x{1D69D}\x{1D69E}\x{1D69F}\x{1D6A0}\x{1D6A1}\x{1D6A2}\x{1D6A3}",
1725             "mathematicalSans-serif" => "\x{1D5A0}\x{1D5A1}\x{1D5A2}\x{1D5A3}\x{1D5A4}\x{1D5A5}\x{1D5A6}\x{1D5A7}\x{1D5A8}\x{1D5A9}\x{1D5AA}\x{1D5AB}\x{1D5AC}\x{1D5AD}\x{1D5AE}\x{1D5AF}\x{1D5B0}\x{1D5B1}\x{1D5B2}\x{1D5B3}\x{1D5B4}\x{1D5B5}\x{1D5B6}\x{1D5B7}\x{1D5B8}\x{1D5B9}\x{1D5BA}\x{1D5BB}\x{1D5BC}\x{1D5BD}\x{1D5BE}\x{1D5BF}\x{1D5C0}\x{1D5C1}\x{1D5C2}\x{1D5C3}\x{1D5C4}\x{1D5C5}\x{1D5C6}\x{1D5C7}\x{1D5C8}\x{1D5C9}\x{1D5CA}\x{1D5CB}\x{1D5CC}\x{1D5CD}\x{1D5CE}\x{1D5CF}\x{1D5D0}\x{1D5D1}\x{1D5D2}\x{1D5D3}",
1726             "mathematicalSans-serifBold" => "\x{1D5D4}\x{1D5D5}\x{1D5D6}\x{1D5D7}\x{1D5D8}\x{1D5D9}\x{1D5DA}\x{1D5DB}\x{1D5DC}\x{1D5DD}\x{1D5DE}\x{1D5DF}\x{1D5E0}\x{1D5E1}\x{1D5E2}\x{1D5E3}\x{1D5E4}\x{1D5E5}\x{1D5E6}\x{1D5E7}\x{1D5E8}\x{1D5E9}\x{1D5EA}\x{1D5EB}\x{1D5EC}\x{1D5ED}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{1D5F1}\x{1D5F2}\x{1D5F3}\x{1D5F4}\x{1D5F5}\x{1D5F6}\x{1D5F7}\x{1D5F8}\x{1D5F9}\x{1D5FA}\x{1D5FB}\x{1D5FC}\x{1D5FD}\x{1D5FE}\x{1D5FF}\x{1D600}\x{1D601}\x{1D602}\x{1D603}\x{1D604}\x{1D605}\x{1D606}\x{1D607}\x{1D756}\x{1D757}\x{1D758}\x{1D759}\x{1D75A}\x{1D75B}\x{1D75C}\x{1D75D}\x{1D75E}\x{1D75F}\x{1D760}\x{1D761}\x{1D762}\x{1D763}\x{1D764}\x{1D765}\x{1D766}\x{1D767}\x{1D768}\x{1D769}\x{1D76A}\x{1D76B}\x{1D76C}\x{1D76D}\x{1D76E}\x{1D76F}\x{1D770}\x{1D771}\x{1D772}\x{1D773}\x{1D774}\x{1D775}\x{1D776}\x{1D777}\x{1D778}\x{1D779}\x{1D77A}\x{1D77B}\x{1D77C}\x{1D77D}\x{1D77E}\x{1D77F}\x{1D780}\x{1D781}\x{1D782}\x{1D783}\x{1D784}\x{1D785}\x{1D786}\x{1D787}\x{1D788}\x{1D789}\x{1D78A}\x{1D78B}\x{1D78C}\x{1D78D}\x{1D78E}\x{1D78F}",
1727             "mathematicalSans-serifBoldItalic" => "\x{1D63C}\x{1D63D}\x{1D63E}\x{1D63F}\x{1D640}\x{1D641}\x{1D642}\x{1D643}\x{1D644}\x{1D645}\x{1D646}\x{1D647}\x{1D648}\x{1D649}\x{1D64A}\x{1D64B}\x{1D64C}\x{1D64D}\x{1D64E}\x{1D64F}\x{1D650}\x{1D651}\x{1D652}\x{1D653}\x{1D654}\x{1D655}\x{1D656}\x{1D657}\x{1D658}\x{1D659}\x{1D65A}\x{1D65B}\x{1D65C}\x{1D65D}\x{1D65E}\x{1D65F}\x{1D660}\x{1D661}\x{1D662}\x{1D663}\x{1D664}\x{1D665}\x{1D666}\x{1D667}\x{1D668}\x{1D669}\x{1D66A}\x{1D66B}\x{1D66C}\x{1D66D}\x{1D66E}\x{1D66F}\x{1D790}\x{1D791}\x{1D792}\x{1D793}\x{1D794}\x{1D795}\x{1D796}\x{1D797}\x{1D798}\x{1D799}\x{1D79A}\x{1D79B}\x{1D79C}\x{1D79D}\x{1D79E}\x{1D79F}\x{1D7A0}\x{1D7A1}\x{1D7A2}\x{1D7A3}\x{1D7A4}\x{1D7A5}\x{1D7A6}\x{1D7A7}\x{1D7A8}\x{1D7A9}\x{1D7AA}\x{1D7AB}\x{1D7AC}\x{1D7AD}\x{1D7AE}\x{1D7AF}\x{1D7B0}\x{1D7B1}\x{1D7B2}\x{1D7B3}\x{1D7B4}\x{1D7B5}\x{1D7B6}\x{1D7B7}\x{1D7B8}\x{1D7B9}\x{1D7BA}\x{1D7BB}\x{1D7BC}\x{1D7BD}\x{1D7BE}\x{1D7BF}\x{1D7C0}\x{1D7C1}\x{1D7C2}\x{1D7C3}\x{1D7C4}\x{1D7C5}\x{1D7C6}\x{1D7C7}\x{1D7C8}\x{1D7C9}",
1728             "mathematicalSans-serifItalic" => "\x{1D608}\x{1D609}\x{1D60A}\x{1D60B}\x{1D60C}\x{1D60D}\x{1D60E}\x{1D60F}\x{1D610}\x{1D611}\x{1D612}\x{1D613}\x{1D614}\x{1D615}\x{1D616}\x{1D617}\x{1D618}\x{1D619}\x{1D61A}\x{1D61B}\x{1D61C}\x{1D61D}\x{1D61E}\x{1D61F}\x{1D620}\x{1D621}\x{1D622}\x{1D623}\x{1D624}\x{1D625}\x{1D626}\x{1D627}\x{1D628}\x{1D629}\x{1D62A}\x{1D62B}\x{1D62C}\x{1D62D}\x{1D62E}\x{1D62F}\x{1D630}\x{1D631}\x{1D632}\x{1D633}\x{1D634}\x{1D635}\x{1D636}\x{1D637}\x{1D638}\x{1D639}\x{1D63A}\x{1D63B}",
1729             "mathematicalScript" => "\x{1D49C}\x{1D49E}\x{1D49F}\x{1D4A2}\x{1D4A5}\x{1D4A6}\x{1D4A9}\x{1D4AA}\x{1D4AB}\x{1D4AC}\x{1D4AE}\x{1D4AF}\x{1D4B0}\x{1D4B1}\x{1D4B2}\x{1D4B3}\x{1D4B4}\x{1D4B5}\x{1D4B6}\x{1D4B7}\x{1D4B8}\x{1D4B9}\x{1D4BB}\x{1D4BD}\x{1D4BE}\x{1D4BF}\x{1D4C0}\x{1D4C1}\x{1D4C2}\x{1D4C3}\x{1D4C5}\x{1D4C6}\x{1D4C7}\x{1D4C8}\x{1D4C9}\x{1D4CA}\x{1D4CB}\x{1D4CC}\x{1D4CD}\x{1D4CE}\x{1D4CF}",
1730             "negativeCircledLatinLetter" => "\x{1F150}\x{1F151}\x{1F152}\x{1F153}\x{1F154}\x{1F155}\x{1F156}\x{1F157}\x{1F158}\x{1F159}\x{1F15A}\x{1F15B}\x{1F15C}\x{1F15D}\x{1F15E}\x{1F15F}\x{1F160}\x{1F161}\x{1F162}\x{1F163}\x{1F164}\x{1F165}\x{1F166}\x{1F167}\x{1F168}\x{1F169}",
1731             "negativeSquaredLatinLetter" => "\x{1F170}\x{1F171}\x{1F172}\x{1F173}\x{1F174}\x{1F175}\x{1F176}\x{1F177}\x{1F178}\x{1F179}\x{1F17A}\x{1F17B}\x{1F17C}\x{1F17D}\x{1F17E}\x{1F17F}\x{1F180}\x{1F181}\x{1F182}\x{1F183}\x{1F184}\x{1F185}\x{1F186}\x{1F187}\x{1F188}\x{1F189}",
1732             "planck" => "\x{210E}",
1733             "semiColon" => "\x{27E2}",
1734             "squaredLatinLetter" => "\x{1F130}\x{1F131}\x{1F132}\x{1F133}\x{1F134}\x{1F135}\x{1F136}\x{1F137}\x{1F138}\x{1F139}\x{1F13A}\x{1F13B}\x{1F13C}\x{1F13D}\x{1F13E}\x{1F13F}\x{1F140}\x{1F141}\x{1F142}\x{1F143}\x{1F144}\x{1F145}\x{1F146}\x{1F147}\x{1F148}\x{1F149}\x{1F1A5}",
1735             },
1736             alphabetsOrdered => {
1737             Ascii => [0 .. 127, 127312 .. 127337],
1738             assign => [8462, 119860 .. 119911, 120546 .. 120603],
1739             dyad => [119808 .. 119859, 120488 .. 120545],
1740             prefix => [119912 .. 119963, 120604 .. 120661],
1741             semiColon => [10210],
1742             suffix => [120380 .. 120431, 120720 .. 120777],
1743             variable => [120276 .. 120327, 120662 .. 120719],
1744             },
1745             brackets => 16,
1746             bracketsBase => 16,
1747             bracketsClose => [
1748             "\x{2309}",
1749             "\x{230B}",
1750             "\x{232A}",
1751             "\x{2769}",
1752             "\x{276B}",
1753             "\x{276D}",
1754             "\x{276F}",
1755             "\x{2771}",
1756             "\x{2773}",
1757             "\x{2775}",
1758             "\x{27E7}",
1759             "\x{27E9}",
1760             "\x{27EB}",
1761             "\x{27ED}",
1762             "\x{27EF}",
1763             "\x{2984}",
1764             "\x{2986}",
1765             "\x{2988}",
1766             "\x{298A}",
1767             "\x{298C}",
1768             "\x{298E}",
1769             "\x{2990}",
1770             "\x{2992}",
1771             "\x{2994}",
1772             "\x{2996}",
1773             "\x{2998}",
1774             "\x{29FD}",
1775             "\x{2E29}",
1776             "\x{3009}",
1777             "\x{300B}",
1778             "\x{3011}",
1779             "\x{3015}",
1780             "\x{3017}",
1781             "\x{3019}",
1782             "\x{301B}",
1783             "\x{FD3F}",
1784             "\x{FF09}",
1785             "\x{FF60}",
1786             ],
1787             bracketsHigh => [
1788             "0x1300230b",
1789             "0x1500232a",
1790             "0x23002775",
1791             "0x2d0027ef",
1792             "0x43002998",
1793             "0x450029fd",
1794             "0x47002e29",
1795             "0x4b00300b",
1796             "0x4d003011",
1797             "0x5500301b",
1798             "0x5700fd3f",
1799             "0x5900ff09",
1800             "0x5b00ff60",
1801             0,
1802             0,
1803             0,
1804             ],
1805             bracketsLow => [
1806             "0x10002308",
1807             "0x14002329",
1808             "0x16002768",
1809             "0x240027e6",
1810             "0x2e002983",
1811             "0x440029fc",
1812             "0x46002e28",
1813             "0x48003008",
1814             "0x4c003010",
1815             "0x4e003014",
1816             "0x5600fd3e",
1817             "0x5800ff08",
1818             "0x5a00ff5f",
1819             0,
1820             0,
1821             0,
1822             ],
1823             bracketsOpen => [
1824             "\x{2308}",
1825             "\x{230A}",
1826             "\x{2329}",
1827             "\x{2768}",
1828             "\x{276A}",
1829             "\x{276C}",
1830             "\x{276E}",
1831             "\x{2770}",
1832             "\x{2772}",
1833             "\x{2774}",
1834             "\x{27E6}",
1835             "\x{27E8}",
1836             "\x{27EA}",
1837             "\x{27EC}",
1838             "\x{27EE}",
1839             "\x{2983}",
1840             "\x{2985}",
1841             "\x{2987}",
1842             "\x{2989}",
1843             "\x{298B}",
1844             "\x{298D}",
1845             "\x{298F}",
1846             "\x{2991}",
1847             "\x{2993}",
1848             "\x{2995}",
1849             "\x{2997}",
1850             "\x{29FC}",
1851             "\x{2E28}",
1852             "\x{3008}",
1853             "\x{300A}",
1854             "\x{3010}",
1855             "\x{3014}",
1856             "\x{3016}",
1857             "\x{3018}",
1858             "\x{301A}",
1859             "\x{FD3E}",
1860             "\x{FF08}",
1861             "\x{FF5F}",
1862             ],
1863             lexicalAlpha => {
1864             "" => [
1865             "circledLatinLetter",
1866             "mathematicalBoldFraktur",
1867             "mathematicalBoldScript",
1868             "mathematicalDouble-struck",
1869             "mathematicalFraktur",
1870             "mathematicalMonospace",
1871             "mathematicalSans-serif",
1872             "mathematicalSans-serifItalic",
1873             "mathematicalScript",
1874             "negativeSquaredLatinLetter",
1875             "semiColon",
1876             "squaredLatinLetter",
1877             ],
1878             "Ascii" => ["negativeCircledLatinLetter"],
1879             "assign" => ["mathematicalItalic", "planck"],
1880             "CloseBracket" => [],
1881             "dyad" => ["mathematicalBold"],
1882             "OpenBracket" => [],
1883             "prefix" => ["mathematicalBoldItalic"],
1884             "semiColon" => [],
1885             "suffix" => ["mathematicalSans-serifBoldItalic"],
1886             "term" => [],
1887             "variable" => ["mathematicalSans-serifBold"],
1888             },
1889             lexicalHigh => [
1890             127,
1891             8462,
1892             10210,
1893             119859,
1894             16897127,
1895             119963,
1896             120327,
1897             120431,
1898             872535777,
1899             889313051,
1900             872535893,
1901             872535951,
1902             872536009,
1903             2147610985,
1904             0,
1905             0,
1906             ],
1907             lexicalLow => [
1908             33554432,
1909             83894542,
1910             134227938,
1911             50451456,
1912             84005940,
1913             67228776,
1914             100783572,
1915             117560892,
1916             50452136,
1917             84006626,
1918             67229468,
1919             100783958,
1920             117561232,
1921             33681744,
1922             0,
1923             0,
1924             ],
1925             lexicals => bless({
1926             Ascii => bless({ letter => "a", like => "v", name => "Ascii", number => 2 }, "Unisyn::Parse::Lexical::Constant"),
1927             assign => bless({ letter => "a", like => "a", name => "assign", number => 5 }, "Unisyn::Parse::Lexical::Constant"),
1928             CloseBracket => bless({ letter => "B", like => "B", name => "CloseBracket", number => 1 }, "Unisyn::Parse::Lexical::Constant"),
1929             dyad => bless({ letter => "d", like => "d", name => "dyad", number => 3 }, "Unisyn::Parse::Lexical::Constant"),
1930             empty => bless({ letter => "e", like => "e", name => "empty", number => 10 }, "Unisyn::Parse::Lexical::Constant"),
1931             NewLineSemiColon => bless({ letter => "N", like => undef, name => "NewLineSemiColon", number => 12 }, "Unisyn::Parse::Lexical::Constant"),
1932             OpenBracket => bless({ letter => "b", like => "b", name => "OpenBracket", number => 0 }, "Unisyn::Parse::Lexical::Constant"),
1933             prefix => bless({ letter => "p", like => "p", name => "prefix", number => 4 }, "Unisyn::Parse::Lexical::Constant"),
1934             semiColon => bless({ letter => "s", like => "s", name => "semiColon", number => 8 }, "Unisyn::Parse::Lexical::Constant"),
1935             suffix => bless({ letter => "q", like => "q", name => "suffix", number => 7 }, "Unisyn::Parse::Lexical::Constant"),
1936             term => bless({ letter => "t", like => "t", name => "term", number => 9 }, "Unisyn::Parse::Lexical::Constant"),
1937             variable => bless({ letter => "v", like => "v", name => "variable", number => 6 }, "Unisyn::Parse::Lexical::Constant"),
1938             WhiteSpace => bless({ letter => "W", like => undef, name => "WhiteSpace", number => 11 }, "Unisyn::Parse::Lexical::Constant"),
1939             }, "Unisyn::Parse::Lexicals"),
1940             sampleLexicals => {
1941             A => [
1942             100663296,
1943             83886080,
1944             33554497,
1945             33554464,
1946             33554497,
1947             33554464,
1948             33554464,
1949             33554464,
1950             33554464,
1951             ],
1952             Adv => [
1953             100663296,
1954             83886080,
1955             33554497,
1956             33554464,
1957             33554497,
1958             33554464,
1959             33554464,
1960             33554464,
1961             33554464,
1962             50331648,
1963             100663296,
1964             ],
1965             BB => [
1966             0,
1967             0,
1968             0,
1969             0,
1970             0,
1971             0,
1972             0,
1973             0,
1974             100663296,
1975             16777216,
1976             16777216,
1977             16777216,
1978             16777216,
1979             16777216,
1980             16777216,
1981             16777216,
1982             16777216,
1983             ],
1984             brackets => [
1985             100663296,
1986             83886080,
1987             0,
1988             0,
1989             0,
1990             100663296,
1991             16777216,
1992             16777216,
1993             50331648,
1994             0,
1995             100663296,
1996             16777216,
1997             16777216,
1998             134217728,
1999             ],
2000             bvB => [0, 100663296, 16777216],
2001             nosemi => [
2002             100663296,
2003             83886080,
2004             0,
2005             0,
2006             0,
2007             100663296,
2008             16777216,
2009             16777216,
2010             50331648,
2011             0,
2012             100663296,
2013             16777216,
2014             16777216,
2015             ],
2016             ppppvdvdvqqqq => [
2017             0,
2018             0,
2019             0,
2020             100663296,
2021             83886080,
2022             100663296,
2023             50331648,
2024             0,
2025             100663296,
2026             50331648,
2027             100663296,
2028             16777216,
2029             134217728,
2030             100663296,
2031             83886080,
2032             100663296,
2033             50331648,
2034             100663296,
2035             16777216,
2036             16777216,
2037             16777216,
2038             ],
2039             s => [100663296, 134217728, 100663296],
2040             s1 => [
2041             100663296,
2042             83886080,
2043             33554442,
2044             33554464,
2045             33554464,
2046             33554497,
2047             33554442,
2048             33554464,
2049             33554464,
2050             33554464,
2051             ],
2052             v => [100663296],
2053             vav => [100663296, 83886080, 100663296],
2054             vavav => [100663296, 83886080, 100663296, 83886080, 100663296],
2055             vnsvs => [
2056             100663296,
2057             33554442,
2058             33554464,
2059             33554464,
2060             33554464,
2061             100663296,
2062             33554464,
2063             33554464,
2064             33554464,
2065             ],
2066             vnv => [100663296, 33554442, 100663296],
2067             vnvs => [
2068             100663296,
2069             33554442,
2070             100663296,
2071             33554464,
2072             33554464,
2073             33554464,
2074             33554464,
2075             ],
2076             ws => [
2077             100663296,
2078             83886080,
2079             0,
2080             0,
2081             0,
2082             100663296,
2083             16777216,
2084             16777216,
2085             50331648,
2086             0,
2087             100663296,
2088             16777216,
2089             16777216,
2090             134217728,
2091             100663296,
2092             83886080,
2093             0,
2094             100663296,
2095             50331648,
2096             100663296,
2097             16777216,
2098             134217728,
2099             ],
2100             wsa => [
2101             100663296,
2102             83886080,
2103             0,
2104             0,
2105             0,
2106             100663296,
2107             16777216,
2108             16777216,
2109             50331648,
2110             0,
2111             100663296,
2112             16777216,
2113             16777216,
2114             134217728,
2115             100663296,
2116             83886080,
2117             33554497,
2118             50331648,
2119             100663296,
2120             134217728,
2121             ],
2122             },
2123             sampleText => {
2124             A => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 ",
2125             Adv => "\x{1D5EE}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}abc 123 \x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D603}\x{1D5EE}\x{1D5FF}",
2126             BB => "\x{230A}\x{2329}\x{2768}\x{276A}\x{276C}\x{276E}\x{2770}\x{2772}\x{1D5EE}\x{2773}\x{2771}\x{276F}\x{276D}\x{276B}\x{2769}\x{232A}\x{230B}",
2127             brackets => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}",
2128             bvB => "\x{2329}\x{1D5EE}\x{1D5EF}\x{1D5F0}\x{232A}",
2129             nosemi => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}",
2130             ppppvdvdvqqqq => "\x{1D482}\x{2774}\x{1D483}\x{27E6}\x{1D484}\x{27E8}\x{1D5EE}\x{1D452}\x{1D45E}\x{1D462}\x{1D44E}\x{1D459}\x{1D460}\x{1D485}\x{1D5EF}\x{1D659}\x{1D42D}\x{1D422}\x{1D426}\x{1D41E}\x{1D42C}\x{27EA}\x{1D5F0}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F1}\x{27EB}\x{27E2}\x{1D5F2}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{1D5F3}\x{1D42C}\x{1D42E}\x{1D41B}\x{1D5F4}\x{1D65D}\x{27E9}\x{1D658}\x{27E7}\x{1D657}\x{2775}\x{1D656}",
2131             s => "\x{1D5EE}\x{27E2}\x{1D5EF}",
2132             s1 => "\x{1D5EE}\x{1D44E}\n \n ",
2133             v => "\x{1D5EE}",
2134             vav => "\x{1D5EE}\x{1D44E}\x{1D5EF}",
2135             vavav => "\x{1D5EE}\x{1D44E}\x{1D5EF}\x{1D44E}\x{1D5F0}",
2136             vnsvs => "\x{1D5EE}\x{1D5EE}\n \x{1D5EF}\x{1D5EF} ",
2137             vnv => "\x{1D5EE}\n\x{1D5EF}",
2138             vnvs => "\x{1D5EE}\n\x{1D5EF} ",
2139             ws => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{276C}\x{1D5EF}\x{1D5EF}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{276D}\x{27E2}",
2140             wsa => "\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}\x{230A}\x{2329}\x{2768}\x{1D5EF}\x{1D5FD}\x{2769}\x{232A}\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{276A}\x{1D600}\x{1D5F0}\x{276B}\x{230B}\x{27E2}\x{1D5EE}\x{1D5EE}\x{1D44E}\x{1D460}\x{1D460}\x{1D456}\x{1D454}\x{1D45B}some--ascii--text\x{1D429}\x{1D425}\x{1D42E}\x{1D42C}\x{1D5F0}\x{1D5F0}\x{27E2}",
2141             },
2142             semiColon => "\x{27E2}",
2143             separator => "\x{205F}",
2144             structure => bless({
2145             codes => bless({
2146             a => bless({
2147             letter => "a",
2148             name => "assignment operator",
2149             next => "bpv",
2150             short => "assign",
2151             }, "Tree::Term::LexicalCode"),
2152             b => bless({
2153             letter => "b",
2154             name => "opening parenthesis",
2155             next => "bBpsv",
2156             short => "OpenBracket",
2157             }, "Tree::Term::LexicalCode"),
2158             B => bless({
2159             letter => "B",
2160             name => "closing parenthesis",
2161             next => "aBdqs",
2162             short => "CloseBracket",
2163             }, "Tree::Term::LexicalCode"),
2164             d => bless({ letter => "d", name => "dyadic operator", next => "bpv", short => "dyad" }, "Tree::Term::LexicalCode"),
2165             p => bless({ letter => "p", name => "prefix operator", next => "bpv", short => "prefix" }, "Tree::Term::LexicalCode"),
2166             q => bless({
2167             letter => "q",
2168             name => "suffix operator",
2169             next => "aBdqs",
2170             short => "suffix",
2171             }, "Tree::Term::LexicalCode"),
2172             s => bless({ letter => "s", name => "semi-colon", next => "bBpsv", short => "semiColon" }, "Tree::Term::LexicalCode"),
2173             t => bless({ letter => "t", name => "term", next => "aBdqs", short => "term" }, "Tree::Term::LexicalCode"),
2174             v => bless({ letter => "v", name => "variable", next => "aBdqs", short => "variable" }, "Tree::Term::LexicalCode"),
2175             }, "Tree::Term::Codes"),
2176             first => "bpsv",
2177             last => "Bqsv",
2178             }, "Tree::Term::LexicalStructure"),
2179             treeTermLexicals => 'fix',
2180             }, "Unisyn::Parse::Lexical::Tables");
2181 1         7 $a->{treeTermLexicals} = $a->{structure}{codes};
2182 1         3 $a;
2183             }}
2184              
2185             #-------------------------------------------------------------------------------
2186             # Export - eeee
2187             #-------------------------------------------------------------------------------
2188              
2189 1     1   11519 use Exporter qw(import);
  1         2  
  1         89  
2190              
2191 1     1   7 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         1  
  1         533  
2192              
2193             @ISA = qw(Exporter);
2194             @EXPORT = qw();
2195             @EXPORT_OK = qw();
2196             %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2197              
2198             # podDocumentation
2199             =pod
2200              
2201             =encoding utf-8
2202              
2203             =head1 Name
2204              
2205             Unisyn::Parse - Parse a Unisyn expression.
2206              
2207             =head1 Synopsis
2208              
2209             Parse the B expression:
2210              
2211             𝒂 ❴ 𝒃 ⟦𝒄⟨ 𝗮 𝑒𝑞𝑢𝑎𝑙𝑠 𝒅 𝗯 𝙙 𝐭𝐢𝐦𝐞𝐬 ⟪𝗰 𝐩𝐥𝐮𝐬 𝗱⟫⟢ 𝗲 𝑎𝑠𝑠𝑖𝑔𝑛 𝗳 𝐬𝐮𝐛 𝗴 𝙝⟩ 𝙘 ⟧ 𝙗 ❵ 𝙖
2212              
2213             To get:
2214              
2215             Suffix: 𝙖
2216             Term
2217             Prefix: 𝒂
2218             Term
2219             Brackets: ⦇⦈
2220             Term
2221             Term
2222             Suffix: 𝙗
2223             Term
2224             Prefix: 𝒃
2225             Term
2226             Brackets: ⦋⦌
2227             Term
2228             Term
2229             Suffix: 𝙘
2230             Term
2231             Prefix: 𝒄
2232             Term
2233             Brackets: ⦏⦐
2234             Term
2235             Term
2236             Semicolon
2237             Term
2238             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2239             Term
2240             Variable: 𝗮
2241             Term
2242             Dyad: 𝐭𝐢𝐦𝐞𝐬
2243             Term
2244             Suffix: 𝙙
2245             Term
2246             Prefix: 𝒅
2247             Term
2248             Variable: 𝗯
2249             Term
2250             Brackets: ⦓⦔
2251             Term
2252             Term
2253             Dyad: 𝐩𝐥𝐮𝐬
2254             Term
2255             Variable: 𝗰
2256             Term
2257             Variable: 𝗱
2258             Term
2259             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2260             Term
2261             Variable: 𝗲
2262             Term
2263             Dyad: 𝐬𝐮𝐛
2264             Term
2265             Variable: 𝗳
2266             Term
2267             Suffix: 𝙝
2268             Term
2269             Variable: 𝗴
2270              
2271             Then traverse the parse tree printing the type of each node:
2272              
2273             variable
2274             variable
2275             prefix_d
2276             suffix_d
2277             variable
2278             variable
2279             plus
2280             times
2281             equals
2282             variable
2283             variable
2284             variable
2285             sub
2286             assign
2287             semiColon
2288             brackets_3
2289             prefix_c
2290             suffix_c
2291             brackets_2
2292             prefix_b
2293             suffix_b
2294             brackets_1
2295             prefix_a
2296             suffix_a
2297              
2298             =head1 Description
2299              
2300             Parse a Unisyn expression.
2301              
2302              
2303             Version "20211008".
2304              
2305              
2306             The following sections describe the methods in each functional area of this
2307             module. For an alphabetic listing of all methods by name see L.
2308              
2309              
2310              
2311             =head1 Create
2312              
2313             Create a Unisyn parse of a utf8 string.
2314              
2315             =head2 create($address, %options)
2316              
2317             Create a new unisyn parse from a utf8 string.
2318              
2319             Parameter Description
2320             1 $address Address of a zero terminated utf8 source string to parse as a variable
2321             2 %options Parse options.
2322              
2323             B
2324              
2325              
2326            
2327             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2328              
2329            
2330             ok Assemble(debug => 0, eq => <
2331             Assign: 𝑎
2332             Term
2333             Variable: 𝗮
2334             Term
2335             Variable: 𝗯
2336             END
2337            
2338              
2339             =head1 Parse
2340              
2341             Parse Unisyn expressions
2342              
2343             =head1 Traverse
2344              
2345             Traverse the parse tree
2346              
2347             =head2 traverseParseTree($parse)
2348              
2349             Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
2350              
2351             Parameter Description
2352             1 $parse Parse tree
2353              
2354             B
2355              
2356              
2357             my $s = Rutf8 $Lex->{sampleText}{Adv}; # Ascii
2358             my $p = create K(address, $s), operators => \&printOperatorSequence;
2359            
2360             K(address, $s)->printOutZeroString;
2361             $p->dumpParseTree;
2362             $p->print;
2363            
2364             $p->traverseParseTree; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2365              
2366            
2367             Assemble(debug => 0, eq => <
2368             𝗮𝗮𝑒𝑞𝑢𝑎𝑙𝑠abc 123 𝐩𝐥𝐮𝐬𝘃𝗮𝗿
2369             Tree at: 0000 0000 0000 10D8 length: 0000 0000 0000 000B
2370             Keys: 0000 1118 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2371             Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0F18 0000 0009 0000 0AD8 0000 0009 0000 0004 0000 0006 0000 0002 0000 0005 0041 2A7C 0000 0003 0000 0009
2372             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2373             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2374             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2375             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 2A7C
2376             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0005
2377             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0002
2378             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0006
2379             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0004
2380             index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2381             index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0AD8 subTree
2382             index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2383             index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0F18 subTree
2384             Tree at: 0000 0000 0000 0AD8 length: 0000 0000 0000 0007
2385             Keys: 0000 0B18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2386             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0000 0000 0006 0041 1B34 0000 0001 0000 0009
2387             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2388             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2389             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2390             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 1B34
2391             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2392             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0000
2393             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0002
2394             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0000
2395             end
2396             Tree at: 0000 0000 0000 0F18 length: 0000 0000 0000 000B
2397             Keys: 0000 0F58 0500 000B 0000 0000 0000 0000 0000 0000 0000 000D 0000 000C 0000 0009 0000 0008 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2398             Data: 0000 0000 0000 0016 0000 0000 0000 0000 0000 0000 0000 0DD8 0000 0009 0000 0C18 0000 0009 0000 0003 0000 0004 0000 0013 0000 0003 0041 3220 0000 0003 0000 0009
2399             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2400             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2401             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0003
2402             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 3220
2403             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0003
2404             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0013
2405             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0004
2406             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0003
2407             index: 0000 0000 0000 0007 key: 0000 0000 0000 0008 data: 0000 0000 0000 0009
2408             index: 0000 0000 0000 0008 key: 0000 0000 0000 0009 data: 0000 0000 0000 0C18 subTree
2409             index: 0000 0000 0000 0009 key: 0000 0000 0000 000C data: 0000 0000 0000 0009
2410             index: 0000 0000 0000 000A key: 0000 0000 0000 000D data: 0000 0000 0000 0DD8 subTree
2411             Tree at: 0000 0000 0000 0C18 length: 0000 0000 0000 0007
2412             Keys: 0000 0C58 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2413             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0001 0000 0007 0000 0008 0000 0002 0041 5806 0000 0001 0000 0009
2414             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2415             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2416             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2417             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 5806
2418             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0002
2419             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0008
2420             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0007
2421             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0001
2422             end
2423             Tree at: 0000 0000 0000 0DD8 length: 0000 0000 0000 0007
2424             Keys: 0000 0E18 0000 0007 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0007 0000 0006 0000 0005 0000 0004 0000 0002 0000 0001 0000 0000
2425             Data: 0000 0000 0000 000E 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0002 0000 0003 0000 0017 0000 0006 0041 1B34 0000 0001 0000 0009
2426             Node: 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
2427             index: 0000 0000 0000 0000 key: 0000 0000 0000 0000 data: 0000 0000 0000 0009
2428             index: 0000 0000 0000 0001 key: 0000 0000 0000 0001 data: 0000 0000 0000 0001
2429             index: 0000 0000 0000 0002 key: 0000 0000 0000 0002 data: 0000 0000 0041 1B34
2430             index: 0000 0000 0000 0003 key: 0000 0000 0000 0004 data: 0000 0000 0000 0006
2431             index: 0000 0000 0000 0004 key: 0000 0000 0000 0005 data: 0000 0000 0000 0017
2432             index: 0000 0000 0000 0005 key: 0000 0000 0000 0006 data: 0000 0000 0000 0003
2433             index: 0000 0000 0000 0006 key: 0000 0000 0000 0007 data: 0000 0000 0000 0002
2434             end
2435             end
2436             end
2437             Assign: 𝑒𝑞𝑢𝑎𝑙𝑠
2438             Term
2439             Variable: 𝗮𝗮
2440             Term
2441             Dyad: 𝐩𝐥𝐮𝐬
2442             Term
2443             Ascii: abc 123
2444             Term
2445             Variable: 𝘃𝗮𝗿
2446             variable
2447             ascii
2448             variable
2449             plus
2450             equals
2451             END
2452            
2453             my $s = Rutf8 $Lex->{sampleText}{ws};
2454             my $p = create (K(address, $s), operators => \&printOperatorSequence);
2455            
2456             K(address, $s)->printOutZeroString; # Print input string
2457             $p->print; # Print parse
2458            
2459             $p->traverseParseTree; # Traverse tree printing terms # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2460              
2461            
2462             Assemble(debug => 0, eq => <
2463             𝗮𝑎𝑠𝑠𝑖𝑔𝑛⌊〈❨𝗯𝗽❩〉𝐩𝐥𝐮𝐬❪𝘀𝗰❫⌋⟢𝗮𝗮𝑎𝑠𝑠𝑖𝑔𝑛❬𝗯𝗯𝐩𝐥𝐮𝐬𝗰𝗰❭⟢
2464             Semicolon
2465             Term
2466             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2467             Term
2468             Variable: 𝗮
2469             Term
2470             Brackets: ⌊⌋
2471             Term
2472             Term
2473             Dyad: 𝐩𝐥𝐮𝐬
2474             Term
2475             Brackets: ❨❩
2476             Term
2477             Term
2478             Brackets: ❬❭
2479             Term
2480             Term
2481             Variable: 𝗯𝗽
2482             Term
2483             Brackets: ❰❱
2484             Term
2485             Term
2486             Variable: 𝘀𝗰
2487             Term
2488             Assign: 𝑎𝑠𝑠𝑖𝑔𝑛
2489             Term
2490             Variable: 𝗮𝗮
2491             Term
2492             Brackets: ❴❵
2493             Term
2494             Term
2495             Dyad: 𝐩𝐥𝐮𝐬
2496             Term
2497             Variable: 𝗯𝗯
2498             Term
2499             Variable: 𝗰𝗰
2500             variable
2501             variable
2502             variable
2503             plus
2504             assign
2505             variable
2506             variable
2507             variable
2508             plus
2509             assign
2510             semiColon
2511             END
2512            
2513              
2514             =head1 Print
2515              
2516             Print a parse tree
2517              
2518             =head2 print($parse)
2519              
2520             Print a parse tree.
2521              
2522             Parameter Description
2523             1 $parse Parse tree
2524              
2525             B
2526              
2527              
2528            
2529             create (K(address, Rutf8 $Lex->{sampleText}{vav}))->print; # Create parse tree from source terminated with zero # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2530              
2531            
2532             ok Assemble(debug => 0, eq => <
2533             Assign: 𝑎
2534             Term
2535             Variable: 𝗮
2536             Term
2537             Variable: 𝗯
2538             END
2539            
2540              
2541             =head2 dumpParseTree($parse)
2542              
2543             Dump the parse tree.
2544              
2545             Parameter Description
2546             1 $parse Parse tree
2547              
2548             =head1 Execute
2549              
2550             Associate methods with each operator via a set of quarks describing the method to be called for each lexical operator.
2551              
2552             =head2 lexToSub($parse, $alphabet, $op, $sub)
2553              
2554             Map a lexical item to a processing subroutine.
2555              
2556             Parameter Description
2557             1 $parse Sub quarks
2558             2 $alphabet The alphabet number
2559             3 $op The operator name in that alphabet
2560             4 $sub Subroutine definition
2561              
2562             =head2 dyad($parse, $text, $sub)
2563              
2564             Define a method for a dyadic operator.
2565              
2566             Parameter Description
2567             1 $parse Sub quarks
2568             2 $text The name of the operator as a utf8 string
2569             3 $sub Associated subroutine definition
2570              
2571             =head2 assign($parse, $text, $sub)
2572              
2573             Define a method for an assign operator.
2574              
2575             Parameter Description
2576             1 $parse Sub quarks
2577             2 $text The name of the operator as a utf8 string
2578             3 $sub Associated subroutine definition
2579              
2580             =head2 prefix($parse, $text, $sub)
2581              
2582             Define a method for a prefix operator.
2583              
2584             Parameter Description
2585             1 $parse Sub quarks
2586             2 $text The name of the operator as a utf8 string
2587             3 $sub Associated subroutine definition
2588              
2589             =head2 suffix($parse, $text, $sub)
2590              
2591             Define a method for a suffix operator.
2592              
2593             Parameter Description
2594             1 $parse Sub quarks
2595             2 $text The name of the operator as a utf8 string
2596             3 $sub Associated subroutine definition
2597              
2598             =head2 ascii($parse, $sub)
2599              
2600             Define a method for ascii text.
2601              
2602             Parameter Description
2603             1 $parse Sub quarks
2604             2 $sub Associated subroutine definition
2605              
2606             =head2 semiColon($parse, $sub)
2607              
2608             Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
2609              
2610             Parameter Description
2611             1 $parse Sub quarks
2612             2 $sub Associated subroutine definition
2613              
2614             =head2 variable($parse, $sub)
2615              
2616             Define a method for a variable.
2617              
2618             Parameter Description
2619             1 $parse Sub quarks
2620             2 $sub Associated subroutine definition
2621              
2622             =head2 bracket($parse, $open, $sub)
2623              
2624             Define a method for a bracket operator.
2625              
2626             Parameter Description
2627             1 $parse Sub quarks
2628             2 $open Opening parenthesis
2629             3 $sub Associated subroutine
2630              
2631             =head1 Alphabets
2632              
2633             Translate between alphabets.
2634              
2635             =head2 asciiToAssignLatin($in)
2636              
2637             Translate ascii to the corresponding letters in the assign latin alphabet.
2638              
2639             Parameter Description
2640             1 $in A string of ascii
2641              
2642             =head2 asciiToAssignGreek($in)
2643              
2644             Translate ascii to the corresponding letters in the assign greek alphabet.
2645              
2646             Parameter Description
2647             1 $in A string of ascii
2648              
2649             =head2 asciiToDyadLatin($in)
2650              
2651             Translate ascii to the corresponding letters in the dyad latin alphabet.
2652              
2653             Parameter Description
2654             1 $in A string of ascii
2655              
2656             =head2 asciiToDyadGreek($in)
2657              
2658             Translate ascii to the corresponding letters in the dyad greek alphabet.
2659              
2660             Parameter Description
2661             1 $in A string of ascii
2662              
2663             =head2 asciiToPrefixLatin($in)
2664              
2665             Translate ascii to the corresponding letters in the prefix latin alphabet.
2666              
2667             Parameter Description
2668             1 $in A string of ascii
2669              
2670             =head2 asciiToPrefixGreek($in)
2671              
2672             Translate ascii to the corresponding letters in the prefix greek alphabet.
2673              
2674             Parameter Description
2675             1 $in A string of ascii
2676              
2677             =head2 asciiToSuffixLatin($in)
2678              
2679             Translate ascii to the corresponding letters in the suffix latin alphabet.
2680              
2681             Parameter Description
2682             1 $in A string of ascii
2683              
2684             =head2 asciiToSuffixGreek($in)
2685              
2686             Translate ascii to the corresponding letters in the suffix greek alphabet.
2687              
2688             Parameter Description
2689             1 $in A string of ascii
2690              
2691             =head2 asciiToVariableLatin($in)
2692              
2693             Translate ascii to the corresponding letters in the suffix latin alphabet.
2694              
2695             Parameter Description
2696             1 $in A string of ascii
2697              
2698             =head2 asciiToVariableGreek($in)
2699              
2700             Translate ascii to the corresponding letters in the suffix greek alphabet.
2701              
2702             Parameter Description
2703             1 $in A string of ascii
2704              
2705             =head2 asciiToEscaped($in)
2706              
2707             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2708              
2709             Parameter Description
2710             1 $in A string of ascii
2711              
2712             =head2 semiColonChar()
2713              
2714             Translate ascii to the corresponding letters in the escaped ascii alphabet.
2715              
2716              
2717             =head2 printOperatorSequence($parse)
2718              
2719             Print the operator calling sequence.
2720              
2721             Parameter Description
2722             1 $parse Parse
2723              
2724             =head2 executeOperator($parse)
2725              
2726             Print the operator calling sequence.
2727              
2728             Parameter Description
2729             1 $parse Parse
2730              
2731              
2732             =head1 Hash Definitions
2733              
2734              
2735              
2736              
2737             =head2 Unisyn::Parse Definition
2738              
2739              
2740             Description of parse
2741              
2742              
2743              
2744              
2745             =head3 Output fields
2746              
2747              
2748             =head4 address8
2749              
2750             Address of source string as utf8
2751              
2752             =head4 arena
2753              
2754             Arena containing tree
2755              
2756             =head4 fails
2757              
2758             Number of failures encountered in this parse
2759              
2760             =head4 operators
2761              
2762             Methods implementing each lexical operator
2763              
2764             =head4 parse
2765              
2766             Offset to the head of the parse tree
2767              
2768             =head4 quarks
2769              
2770             Quarks representing the strings used in this parse
2771              
2772             =head4 size8
2773              
2774             Size of source string as utf8
2775              
2776             =head4 source32
2777              
2778             Source text as utf32
2779              
2780             =head4 sourceLength32
2781              
2782             Length of utf32 string
2783              
2784             =head4 sourceSize32
2785              
2786             Size of utf32 allocation
2787              
2788              
2789              
2790             =head1 Private Methods
2791              
2792             =head2 getAlpha($register, $address, $index)
2793              
2794             Load the position of a lexical item in its alphabet from the current character.
2795              
2796             Parameter Description
2797             1 $register Register to load
2798             2 $address Address of start of string
2799             3 $index Index into string
2800              
2801             =head2 getLexicalCode($register, $address, $index)
2802              
2803             Load the lexical code of the current character in memory into the specified register.
2804              
2805             Parameter Description
2806             1 $register Register to load
2807             2 $address Address of start of string
2808             3 $index Index into string
2809              
2810             =head2 putLexicalCode($register, $address, $index, $code)
2811              
2812             Put the specified lexical code into the current character in memory.
2813              
2814             Parameter Description
2815             1 $register Register used to load code
2816             2 $address Address of string
2817             3 $index Index into string
2818             4 $code Code to put
2819              
2820             =head2 loadCurrentChar()
2821              
2822             Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
2823              
2824              
2825             =head2 checkStackHas($depth)
2826              
2827             Check that we have at least the specified number of elements on the stack.
2828              
2829             Parameter Description
2830             1 $depth Number of elements required on the stack
2831              
2832             =head2 pushElement()
2833              
2834             Push the current element on to the stack.
2835              
2836              
2837             =head2 pushEmpty()
2838              
2839             Push the empty element on to the stack.
2840              
2841              
2842             =head2 lexicalNameFromLetter($l)
2843              
2844             Lexical name for a lexical item described by its letter.
2845              
2846             Parameter Description
2847             1 $l Letter of the lexical item
2848              
2849             =head2 lexicalNumberFromLetter($l)
2850              
2851             Lexical number for a lexical item described by its letter.
2852              
2853             Parameter Description
2854             1 $l Letter of the lexical item
2855              
2856             =head2 lexicalItemLength($source32, $offset)
2857              
2858             Put the length of a lexical item into variable B.
2859              
2860             Parameter Description
2861             1 $source32 B
of utf32 source representation
2862             2 $offset B to lexical item in utf32
2863              
2864             =head2 new($depth, $description)
2865              
2866             Create a new term in the parse tree rooted on the stack.
2867              
2868             Parameter Description
2869             1 $depth Stack depth to be converted
2870             2 $description Text reason why we are creating a new term
2871              
2872             =head2 error($message)
2873              
2874             Write an error message and stop.
2875              
2876             Parameter Description
2877             1 $message Error message
2878              
2879             =head2 testSet($set, $register)
2880              
2881             Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
2882              
2883             Parameter Description
2884             1 $set Set of lexical letters
2885             2 $register Register to test
2886              
2887             =head2 checkSet($set)
2888              
2889             Check that one of a set of items is on the top of the stack or complain if it is not.
2890              
2891             Parameter Description
2892             1 $set Set of lexical letters
2893              
2894             =head2 reduce($priority)
2895              
2896             Convert the longest possible expression on top of the stack into a term at the specified priority.
2897              
2898             Parameter Description
2899             1 $priority Priority of the operators to reduce
2900              
2901             =head2 reduceMultiple($priority)
2902              
2903             Reduce existing operators on the stack.
2904              
2905             Parameter Description
2906             1 $priority Priority of the operators to reduce
2907              
2908             =head2 accept_a()
2909              
2910             Assign.
2911              
2912              
2913             =head2 accept_b()
2914              
2915             Open.
2916              
2917              
2918             =head2 accept_B()
2919              
2920             Closing parenthesis.
2921              
2922              
2923             =head2 accept_d()
2924              
2925             Infix but not assign or semi-colon.
2926              
2927              
2928             =head2 accept_p()
2929              
2930             Prefix.
2931              
2932              
2933             =head2 accept_q()
2934              
2935             Post fix.
2936              
2937              
2938             =head2 accept_s()
2939              
2940             Semi colon.
2941              
2942              
2943             =head2 accept_v()
2944              
2945             Variable.
2946              
2947              
2948             =head2 parseExpression()
2949              
2950             Parse the string of classified lexical items addressed by register $start of length $length. The resulting parse tree (if any) is returned in r15.
2951              
2952              
2953             =head2 MatchBrackets(@parameters)
2954              
2955             Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket. Opening brackets have even codes from 0x10 to 0x4e while the corresponding closing bracket has a code one higher.
2956              
2957             Parameter Description
2958             1 @parameters Parameters
2959              
2960             =head2 ClassifyNewLines(@parameters)
2961              
2962             Scan input string looking for opportunities to convert new lines into semi colons.
2963              
2964             Parameter Description
2965             1 @parameters Parameters
2966              
2967             =head2 ClassifyWhiteSpace(@parameters)
2968              
2969             Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.pl".
2970              
2971             Parameter Description
2972             1 @parameters Parameters
2973              
2974             =head2 reload($parse, $parameters)
2975              
2976             Reload the variables associated with a parse.
2977              
2978             Parameter Description
2979             1 $parse Parse
2980             2 $parameters Hash of variable parameters
2981              
2982             =head2 parseUtf8($parse, @parameters)
2983              
2984             Parse a unisyn expression encoded as utf8 and return the parse tree.
2985              
2986             Parameter Description
2987             1 $parse Parse
2988             2 @parameters Parameters
2989              
2990             =head2 printLexicalItem($parse, $source32, $offset, $size)
2991              
2992             Print the utf8 string corresponding to a lexical item at a variable offset.
2993              
2994             Parameter Description
2995             1 $parse Parse tree
2996             2 $source32 B
of utf32 source representation
2997             3 $offset B to lexical item in utf32
2998             4 $size B in utf32 chars of item
2999              
3000             =head2 showAlphabet($alphabet)
3001              
3002             Show an alphabet.
3003              
3004             Parameter Description
3005             1 $alphabet Alphabet name
3006              
3007             =head2 T($key, $expected, %options)
3008              
3009             Parse some text and dump the results.
3010              
3011             Parameter Description
3012             1 $key Key of text to be parsed
3013             2 $expected Expected result
3014             3 %options Options
3015              
3016             =head2 C($key, $expected, %options)
3017              
3018             Parse some text and print the results.
3019              
3020             Parameter Description
3021             1 $key Key of text to be parsed
3022             2 $expected Expected result
3023             3 %options Options
3024              
3025              
3026             =head1 Index
3027              
3028              
3029             1 L - Assign.
3030              
3031             2 L - Closing parenthesis.
3032              
3033             3 L - Open.
3034              
3035             4 L - Infix but not assign or semi-colon.
3036              
3037             5 L - Prefix.
3038              
3039             6 L - Post fix.
3040              
3041             7 L - Semi colon.
3042              
3043             8 L - Variable.
3044              
3045             9 L - Define a method for ascii text.
3046              
3047             10 L - Translate ascii to the corresponding letters in the assign greek alphabet.
3048              
3049             11 L - Translate ascii to the corresponding letters in the assign latin alphabet.
3050              
3051             12 L - Translate ascii to the corresponding letters in the dyad greek alphabet.
3052              
3053             13 L - Translate ascii to the corresponding letters in the dyad latin alphabet.
3054              
3055             14 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3056              
3057             15 L - Translate ascii to the corresponding letters in the prefix greek alphabet.
3058              
3059             16 L - Translate ascii to the corresponding letters in the prefix latin alphabet.
3060              
3061             17 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3062              
3063             18 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3064              
3065             19 L - Translate ascii to the corresponding letters in the suffix greek alphabet.
3066              
3067             20 L - Translate ascii to the corresponding letters in the suffix latin alphabet.
3068              
3069             21 L - Define a method for an assign operator.
3070              
3071             22 L - Define a method for a bracket operator.
3072              
3073             23 L - Parse some text and print the results.
3074              
3075             24 L - Check that one of a set of items is on the top of the stack or complain if it is not.
3076              
3077             25 L - Check that we have at least the specified number of elements on the stack.
3078              
3079             26 L - Scan input string looking for opportunities to convert new lines into semi colons.
3080              
3081             27 L - Classify white space per: "lib/Unisyn/whiteSpace/whiteSpaceClassification.
3082              
3083             28 L - Create a new unisyn parse from a utf8 string.
3084              
3085             29 L - Dump the parse tree.
3086              
3087             30 L - Define a method for a dyadic operator.
3088              
3089             31 L - Write an error message and stop.
3090              
3091             32 L - Print the operator calling sequence.
3092              
3093             33 L - Load the position of a lexical item in its alphabet from the current character.
3094              
3095             34 L - Load the lexical code of the current character in memory into the specified register.
3096              
3097             35 L - Put the length of a lexical item into variable B.
3098              
3099             36 L - Lexical name for a lexical item described by its letter.
3100              
3101             37 L - Lexical number for a lexical item described by its letter.
3102              
3103             38 L - Map a lexical item to a processing subroutine.
3104              
3105             39 L - Load the details of the character currently being processed so that we have the index of the character in the upper half of the current character and the lexical type of the character in the lowest byte.
3106              
3107             40 L - Replace the low three bytes of a utf32 bracket character with 24 bits of offset to the matching opening or closing bracket.
3108              
3109             41 L - Create a new term in the parse tree rooted on the stack.
3110              
3111             42 L - Parse the string of classified lexical items addressed by register $start of length $length.
3112              
3113             43 L - Parse a unisyn expression encoded as utf8 and return the parse tree.
3114              
3115             44 L - Define a method for a prefix operator.
3116              
3117             45 L - Print a parse tree.
3118              
3119             46 L - Print the utf8 string corresponding to a lexical item at a variable offset.
3120              
3121             47 L - Print the operator calling sequence.
3122              
3123             48 L - Push the current element on to the stack.
3124              
3125             49 L - Push the empty element on to the stack.
3126              
3127             50 L - Put the specified lexical code into the current character in memory.
3128              
3129             51 L - Convert the longest possible expression on top of the stack into a term at the specified priority.
3130              
3131             52 L - Reduce existing operators on the stack.
3132              
3133             53 L - Reload the variables associated with a parse.
3134              
3135             54 L - Define a method for the semicolon operator which comes in two forms: the explicit semi colon and a new line semicolon.
3136              
3137             55 L - Translate ascii to the corresponding letters in the escaped ascii alphabet.
3138              
3139             56 L - Show an alphabet.
3140              
3141             57 L - Define a method for a suffix operator.
3142              
3143             58 L - Parse some text and dump the results.
3144              
3145             59 L - Test a set of items, setting the Zero Flag is one matches else clear the Zero flag.
3146              
3147             60 L - Traverse the terms in parse tree in post order and call the operator subroutine associated with each term.
3148              
3149             61 L - Define a method for a variable.
3150              
3151             =head1 Installation
3152              
3153             This module is written in 100% Pure Perl and, thus, it is easy to read,
3154             comprehend, use, modify and install via B:
3155              
3156             sudo cpan install Unisyn::Parse
3157              
3158             =head1 Author
3159              
3160             L
3161              
3162             L
3163              
3164             =head1 Copyright
3165              
3166             Copyright (c) 2016-2021 Philip R Brenan.
3167              
3168             This module is free software. It may be used, redistributed and/or modified
3169             under the same terms as Perl itself.
3170              
3171             =cut
3172              
3173              
3174              
3175             # Tests and documentation
3176              
3177             sub test
3178 1     1 0 6 {my $p = __PACKAGE__;
3179 1         8 binmode($_, ":utf8") for *STDOUT, *STDERR;
3180 1 50       62 return if eval "eof(${p}::DATA)";
3181 1         52 my $s = eval "join('', <${p}::DATA>)";
3182 1 50       22 $@ and die $@;
3183 1 0   1 1 6 eval $s;
  1     1 1 2  
  1     0 1 6  
  1     0 1 633  
  1     0   58283  
  1     0   7  
  1     0   126  
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
  0            
3184 0 0         $@ and die $@;
3185 0           1
3186             }
3187              
3188             test unless caller;
3189              
3190             1;
3191             # podDocumentation
3192             __DATA__