File Coverage

blib/lib/Simd/Avx512.pm
Criterion Covered Total %
statement 269 303 88.7
branch 113 154 73.3
condition 29 63 46.0
subroutine 66 82 80.4
pod 35 59 59.3
total 512 661 77.4


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/
2             #-------------------------------------------------------------------------------
3             # Simd::Avx512 - Emulate SIMD instructions
4             # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             package Simd::Avx512;
8             our $VERSION = 20210128;
9 1     1   1729 use warnings FATAL => qw(all);
  1         7  
  1         39  
10 1     1   6 use strict;
  1         2  
  1         30  
11 1     1   6 use Carp;
  1         2  
  1         86  
12 1     1   578 use Data::Dump qw(dump);
  1         7708  
  1         66  
13 1     1   10 use feature qw(say current_sub);
  1         2  
  1         5104  
14              
15             my $develop = -e q(/home/phil/); # Development mode
16              
17             sub repeat($$) # Repeat a string
18 404     404 0 660 {my ($string, $repeat) = @_; # String to repeat, number of repetitions
19 404         2666 $string x $repeat
20             }
21              
22 157     157 0 236 sub zByte {repeat('0', 8)} # Zero byte
23 0     0 0 0 sub zWord {repeat('0', 16)} # Zero word
24 0     0 0 0 sub zDWord {repeat('0', 32)} # Zero double word
25 90     90 0 172 sub zQWord {repeat('0', 64)} # Zero quad word
26              
27             sub zBytes($) # String of zero bytes of specified length
28 157     157 0 246 {my ($length) = @_; # Length
29 157         260 repeat(zByte, $length)
30             }
31              
32             sub zWords($) # String of zero words of specified length
33 0     0 0 0 {my ($length) = @_; # Length
34 0         0 repeat(zWord, $length)
35             }
36              
37             sub zDWords($) # String of zero double words of specified length
38 0     0 0 0 {my ($length) = @_; # Length
39 0         0 repeat(zDWord, $length)
40             }
41              
42             sub zQWords($) # String of zero quad words of specified length
43 0     0 0 0 {my ($length) = @_; # Length
44 0         0 repeat(zQWord, $length)
45             }
46              
47             sub byte($) # A byte with the specified value
48 0     0 0 0 {my ($value) = @_; # Value of the byte
49 0 0 0     0 confess "0 - 2**8 required ($value)" unless $value >= 0 and $value < 2**8;
50 0         0 sprintf("%08b", $value)
51             }
52              
53             sub word($) # A word with the specified value
54 0     0 0 0 {my ($value) = @_; # Value of the word
55 0 0 0     0 confess "0 - 2**16 required ($value)" unless $value >= 0 and $value < 2**16;
56 0         0 sprintf("%016b", $value)
57             }
58              
59             sub dWord($) # A double word with the specified value
60 0     0 0 0 {my ($value) = @_; # Value of the double word
61 0 0 0     0 confess "0 - 2**32 required ($value)" unless $value >= 0 and $value < 2**32;
62 0         0 sprintf("%032b", $value)
63             }
64              
65             sub qWord($) # A quad word with the specified value
66 0     0 0 0 {my ($value) = @_; # Value of the quad word
67 0 0 0     0 confess "0 - 2**64 required ($value)" unless $value >= 0 and $value < 2**64;
68 0         0 sprintf("%064b", $value)
69             }
70              
71 90     90 0 155 sub maskRegister {zQWord} # Mask register set to zero
72              
73             sub require8or16or32or64($) # Check that we have a size of 8|16|32|64 bits
74 0     0 0 0 {my ($size) = @_; # Size to check
75 0 0 0     0 confess "8|16|32|64 required for operand ($size)" unless $size == 8 or $size == 16 or $size == 32 or $size == 64;
      0        
      0        
76             }
77              
78             sub require64($) # Check that we have a string of 64 bits
79 56     56 0 95 {my ($xmm) = @_; # Bytes
80 56 50       111 defined($xmm) or confess;
81 56         83 my $l = length $xmm;
82 56 50       110 confess "64 bits required for operand ($l)" unless $l == 64;
83 56 50       240 confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
84             }
85              
86             sub require128($) # Check that we have a string of 128 bits
87 42     42 0 68 {my ($xmm) = @_; # Bytes
88 42         61 my $l = length $xmm;
89 42 50       78 confess "128 bits required for operand ($l)" unless $l == 128;
90 42 50       158 confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
91             }
92              
93             sub require128or256or512($;$) # Check that we have a string of 128|256|512 bits in the first operand and optionally the same in the second operand
94 98     98 0 184 {my ($xmm1, $xmm2) = @_; # Bytes, optional bytes
95 98         141 my $l = length $xmm1;
96 98 50 100     385 confess "128|256|512 bits required for first operand ($l)" unless $l == 128 or $l == 256 or $l == 512;
      66        
97 98 100       196 if (defined $xmm2)
98 92         125 {my $m = length $xmm2;
99 92 50 100     293 confess "128|256|512 bits required for second operand ($m)" unless $m == 128 or $m == 256 or $m == 512;
      66        
100 92 50       201 confess "Operands must have same length($l,$m)" unless $l == $m;
101             }
102             }
103              
104             sub require64or128or256or512($) # Check that we have a string of 64|128|256|512 bits
105 3     3 0 7 {my ($xmm) = @_; # Bytes
106 3         5 my $l = length $xmm;
107 3 0 33     11 confess "64|128|256|512 bits required for operand" unless $l == 64 or $l == 128 or $l == 256 or $l == 512;
      33        
      0        
108 3 50       14 confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
109             }
110              
111             sub requireSameLength($$) # Check that the two operands have the same length
112 448     448 0 625 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
113 448         659 my ($l, $L) = (length($xmm1), length($xmm2));
114 448 50       760 confess "Operands have different lengths($l, $L)" unless $l == $L;
115 448         999 $l
116             }
117              
118             sub flipBitsUnderMask($$) # Flip the bits in a string where the corresponding mask bit is 1 else leave the bit as is
119 29     29 0 68 {my ($string, $mask) = @_; # Bit string, mask
120 29         61 my $l = requireSameLength $string, $mask;
121 29         47 my $f = '';
122 29         106 for my $i(0..$l-1) # Each character in the string and mask
123 1636         2092 {my $s = substr($string, $i, 1);
124 1636 100       2911 $f .= substr($mask, $i, 1) eq '0' ? $s : $s eq '0' ? '1' : '0'
    100          
125             }
126             $f
127 29         149 }
128              
129             sub compareTwosComplement($$) # Compare two numbers in two's complement formats and return -1 if the first number is less than the second, 0 if they are equal, else +1
130 103     103 0 258 {my ($a, $b) = @_; # First, second
131 103         301 my $n = requireSameLength $a, $b;
132              
133 103 100 100     290 return -1 if substr($a, 0, 1) eq '1' and substr($b, 0, 1) eq '0'; # Leading sign bit
134 88 100 100     290 return +1 if substr($a, 0, 1) eq '0' and substr($b, 0, 1) eq '1';
135              
136 81         137 for(1..$n) # Non sign bits
137 701 100 100     1682 {return -1 if substr($a, $_, 1) eq '0' and substr($b, $_, 1) eq '1';
138 691 100 100     1477 return +1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '0';
139             }
140             0 # Equal
141 53         169 }
142              
143             #D1 Instructions # Emulation of Avx512 instructions
144              
145             sub PSLLDQ($$) # Packed Shift Left Logical DoubleQword
146 8     8 1 22 {my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
147 8         17 require128 $xmm1; # Check that we have a string of 128 bits
148 8         21 substr($xmm1, $imm8 * 8).zBytes($imm8)
149             }
150              
151             sub VPSLLDQ($$) # Packed Shift Left Logical DoubleQword
152 3     3 1 7 {my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
153 3         10 require128or256or512 $xmm1; # Check that we have a string of 128 bits
154 3 50 33     12 confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
155              
156 3 100       12 return PSLLDQ($xmm1, $imm8) if length($xmm1) == 128;
157              
158 2 100       8 return PSLLDQ(substr($xmm1, 0, 128), $imm8).
159             PSLLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
160              
161 1         5 return PSLLDQ(substr($xmm1, 0, 128), $imm8).
162             PSLLDQ(substr($xmm1, 128, 128), $imm8).
163             PSLLDQ(substr($xmm1, 256, 128), $imm8).
164             PSLLDQ(substr($xmm1, 384, 128), $imm8)
165             }
166              
167             sub PSRLDQ($$) # Packed Shift Right Logical DoubleQword
168 18     18 1 45 {my ($xmm1, $imm8) = @_; # Bytes, length of shift
169 18         39 require128 $xmm1; # Check that we have a string of 128 bits
170 18         39 zBytes($imm8).substr($xmm1, 0, 128 - $imm8 * 8)
171             }
172              
173             sub VPSRLDQ($$) # Packed Shift Right Logical DoubleQword
174 3     3 1 7 {my ($xmm1, $imm8) = @_; # Bytes, length of shift
175 3         9 require128or256or512 $xmm1; # Check that we have a string of 128 bits
176 3 50 33     19 confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
177              
178 3 100       10 return PSRLDQ($xmm1, $imm8) if length($xmm1) == 128;
179              
180 2 100       9 return PSRLDQ(substr($xmm1, 0, 128), $imm8).
181             PSRLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
182              
183 1         4 return PSRLDQ(substr($xmm1, 0, 128), $imm8).
184             PSRLDQ(substr($xmm1, 128, 128), $imm8).
185             PSRLDQ(substr($xmm1, 256, 128), $imm8).
186             PSRLDQ(substr($xmm1, 384, 128), $imm8)
187             }
188              
189             #D1 PCMP # Packed CoMPare
190             #D2 PCMPEQ # Packed CoMPare EQual
191              
192             sub pcmpeq($$$) #P Packed CoMPare EQual
193 4     4 1 9 {my ($size, $xmm1, $xmm2) = @_; # Size in bits, element, element
194              
195 4 50       12 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
196 4         10 require128 $xmm1; # Check that we have a string of 128 bits in the first operand
197 4         10 require128 $xmm2; # Check that we have a string of 128 bits in the second operand
198 4         13 requireSameLength $xmm1, $xmm2; # Check operands have the same length
199              
200 4         10 my $N = 128 / $size; # Bytes in operation
201 4         10 my $clear = '0' x $size;
202 4         9 my $set = '1' x $size;
203 4         10 my $xmm3 = zBytes $N;
204 4         15 for(0..$N-1)
205 30         47 {my $o = $_ * $size;
206 30 100       65 substr($xmm3, $o, $size) =
207             substr($xmm1, $o, $size) eq
208             substr($xmm2, $o, $size) ? $set : $clear;
209             }
210             $xmm3
211 4         17 }
212              
213             sub PCMPEQB($$) # Packed CoMPare EQual Byte
214 1     1 1 3 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
215 1         5 pcmpeq 8, $xmm1, $xmm2;
216             }
217              
218             sub PCMPEQW($$) # Packed CoMPare EQual Word
219 1     1 1 20 {my ($xmm1, $xmm2) = @_; # Words, words
220 1         12 pcmpeq 16, $xmm1, $xmm2;
221             }
222              
223             sub PCMPEQD($$) # Packed CoMPare EQual DWord
224 1     1 1 3 {my ($xmm1, $xmm2) = @_; # DWords, DWords
225 1         4 pcmpeq 32, $xmm1, $xmm2;
226             }
227              
228             sub PCMPEQQ($$) # Packed CoMPare EQual QWord
229 1     1 1 4 {my ($xmm1, $xmm2) = @_; # QWords, QWords
230 1         3 pcmpeq 64, $xmm1, $xmm2;
231             }
232              
233             #D2 PCMPGT # Packed CoMPare Greater Than
234              
235             sub pcmpgt($$$) #P Packed CoMPare Greater Than
236 4     4 1 10 {my ($size, $xmm1, $xmm2) = @_; # Size in bits, element, element
237              
238 4 50       11 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
239 4         10 require128 $xmm1; # Check that we have a string of 128 bits in the first operand
240 4         10 require128 $xmm2; # Check that we have a string of 128 bits in the second operand
241 4         12 requireSameLength $xmm1, $xmm2; # Check operands have the same length
242              
243 4         8 my $N = 128 / $size; # Bytes in operation
244 4         10 my $clear = '0' x $size;
245 4         9 my $set = '1' x $size;
246 4         10 my $xmm3 = zBytes $N;
247 4         15 for(0..$N-1)
248 30         43 {my $o = $_ * $size;
249 30 100       66 substr($xmm3, $o, $size) = +1 == compareTwosComplement( # Signed compare
250             substr($xmm1, $o, $size),
251             substr($xmm2, $o, $size)) ? $set : $clear;
252             }
253             $xmm3
254 4         19 }
255              
256             sub PCMPGTB($$) # Packed CoMPare Greater Than Byte
257 1     1 1 4 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
258 1         4 pcmpgt 8, $xmm1, $xmm2;
259             }
260              
261             sub PCMPGTW($$) # Packed CoMPare Greater Than Word
262 1     1 1 4 {my ($xmm1, $xmm2) = @_; # Words, words
263 1         3 pcmpgt 16, $xmm1, $xmm2;
264             }
265              
266             sub PCMPGTD($$) # Packed CoMPare Greater Than DWord
267 1     1 1 3 {my ($xmm1, $xmm2) = @_; # DWords, DWords
268 1         4 pcmpgt 32, $xmm1, $xmm2;
269             }
270              
271             sub PCMPGTQ($$) # Packed CoMPare Greater Than QWord
272 1     1 1 4 {my ($xmm1, $xmm2) = @_; # QWords, QWords
273 1         10 pcmpgt 64, $xmm1, $xmm2;
274             }
275              
276             #D1 VPCMP # Packed CoMPare
277             #D2 VPCMPEQ # Packed CoMPare EQual
278              
279             sub vpcmpeq($$$;$) #P Packed CoMPare EQual Byte|word|double|quad with optional masking
280 5     5 1 11 {my ($size, $k2, $xmm1, $xmm2) = @_; # Size in bits: 8|16|32|64 of each element, optional input mask, bytes, bytes
281              
282 5 50       13 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
283 5 100       17 require64or128or256or512 $k2 if defined $k2; # Optional mask
284 5         15 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
285              
286 5         12 my $N = length($xmm1) / $size; # Bytes|Words|Doubles|Quads in operation
287 5 100       11 if (defined $k2) # Masked operation
288 3         9 {my $k1 = maskRegister; # Result register
289 3 100       12 $k2 = substr($k2, 48) if $N == 16; # Relevant portion of register
290 3 100       8 $k2 = substr($k2, 32) if $N == 32;
291 3         10 for(0..$N-1)
292 112 100       185 {next unless substr($k2, $_, 1) eq '1';
293 16         19 my $o = $_ * $size;
294 16 100       41 substr($k1, $_, 1) = substr($xmm1, $o, $size) eq
295             substr($xmm2, $o, $size) ? '1' : '0';
296             }
297 3 100       9 return zBytes(6).substr($k1, 0, 16) if $N == 16;
298 2 100       7 return zBytes(4).substr($k1, 0, 32) if $N == 32;
299 1         5 return $k1
300             }
301              
302 2         6 my $xmm3 = zBytes $N; # Non masked operation
303 2         7 my $clear = '0' x $size;
304 2         5 my $set = '1' x $size;
305 2         6 for(0..$N-1)
306 48         60 {my $o = $_ * $size;
307 48 100       92 substr($xmm3, $o, $size) = substr($xmm1, $o, $size) eq
308             substr($xmm2, $o, $size) ? $set : $clear
309             }
310             $xmm3
311 2         10 }
312              
313             sub VPCMPEQB($$;$) # Packed CoMPare EQual Byte with optional masking
314 5 100   5 1 24 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes
315 5         11 vpcmpeq(8, $k2, $xmm1, $xmm2)
316             }
317              
318             sub VPCMPEQW($$;$) # Packed CoMPare EQual Byte with optional masking
319 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, words, words
320 0         0 vpcmpeq(16, $k2, $xmm1, $xmm2)
321             }
322              
323             sub VPCMPEQD($$;$) # Packed CoMPare EQual Byte with optional masking
324 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, dwords, dwords
325 0         0 vpcmpeq(32, $k2, $xmm1, $xmm2)
326             }
327              
328             sub VPCMPEQQ($$;$) # Packed CoMPare EQual Byte with optional masking
329 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, qwords, qwords
330 0         0 vpcmpeq(64, $k2, $xmm1, $xmm2)
331             }
332              
333             #D2 VPCMP # Packed CoMPare
334              
335             sub vpcmp($$$$$) #P Packed CoMPare
336 10     10 1 22 {my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code
337              
338 10 50       25 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
339 10         26 require64 $k2; # Mask
340 10         26 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
341 10 50       43 confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code
342              
343             my $T = # String tests
344 16 100   16   25 [sub {return 1 if compareTwosComplement($_[0], $_[1]) == 0; 0}, # eq 0
  6         20  
345 8 100   8   16 sub {return 1 if compareTwosComplement($_[0], $_[1]) == -1; 0}, # lt 1
  5         16  
346 8 100   8   13 sub {return 1 if compareTwosComplement($_[0], $_[1]) != +1; 0}, # le 2
  1         5  
347             undef,
348 16 100   16   29 sub {return 1 if compareTwosComplement($_[0], $_[1]) != 0; 0}, # ne 4
  10         29  
349 8 100   8   13 sub {return 1 if compareTwosComplement($_[0], $_[1]) != -1; 0}, # ge 5
  3         9  
350 8 100   8   14 sub {return 1 if compareTwosComplement($_[0], $_[1]) == +1; 0}, # gt 6
  7         21  
351 10         79 ];
352              
353 10         28 my $N = length($xmm1) / $size; # Number of elements
354 10         20 my $k1 = maskRegister;
355 10         31 $k2 = substr($k2, -$N); # Relevant portion of mask
356 10         27 for(0..$N-1)
357 480 100       800 {next unless substr($k2, $_, 1) eq '1'; # Mask
358 64         90 my $o = $_ * $size;
359 64 100       112 substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code
  64         108  
360             substr($xmm2, $o, $size)) ? '1' : '0';
361             }
362              
363 10         21 substr(zBytes(8).substr($k1, 0, $N), -64)
364             }
365              
366             sub VPCMPB($$$$) # Packed CoMPare Byte
367 10     10 1 29 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code
368 10         25 vpcmp 8, $k2, $xmm1, $xmm2, $op
369             }
370              
371             sub VPCMPW($$$$) # Packed CoMPare Word
372 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code
373 0         0 vpcmp 16, $k2, $xmm1, $xmm2, $op
374             }
375              
376             sub VPCMPD($$$$) # Packed CoMPare Dword
377 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code
378 0         0 vpcmp 32, $k2, $xmm1, $xmm2, $op
379             }
380              
381             sub VPCMPQ($$$$) # Packed CoMPare Qword
382 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code
383 0         0 vpcmp 64, $k2, $xmm1, $xmm2, $op
384             }
385              
386             #D2 VPCMPU # Packed CoMPare Unsigned
387              
388             sub vpcmpu($$$$$) #P Packed CoMPare Unsigned
389 46     46 1 95 {my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code
390              
391 46 50       105 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
392 46         116 require64 $k2; # Mask
393 46         131 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
394 46 50       166 confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code
395              
396             my $T = # String tests
397 50 100   50   149 [sub {return 1 if $_[0] eq $_[1]; 0}, # eq 0
  32         83  
398 42 100   42   117 sub {return 1 if $_[0] lt $_[1]; 0}, # lt 1
  29         77  
399 42 100   42   144 sub {return 1 if $_[0] le $_[1]; 0}, # le 2
  17         46  
400             undef,
401 50 100   50   160 sub {return 1 if $_[0] ne $_[1]; 0}, # ne 4
  18         50  
402 42 100   42   144 sub {return 1 if $_[0] ge $_[1]; 0}, # ge 5
  13         33  
403 42 100   42   128 sub {return 1 if $_[0] gt $_[1]; 0}, # gt 6
  25         69  
404 46         346 ];
405              
406 46         116 my $N = length($xmm1) / $size; # Number of elements
407 46         81 my $k1 = maskRegister;
408 46         127 $k2 = substr($k2, -$N); # Relevant portion of mask
409 46         136 for(0..$N-1)
410 1152 100       1973 {next unless substr($k2, $_, 1) eq '1'; # Mask
411 268         348 my $o = $_ * $size;
412 268 100       480 substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code
  268         432  
413             substr($xmm2, $o, $size)) ? '1' : '0';
414             }
415              
416 46         87 substr(zBytes(8).substr($k1, 0, $N), -64)
417             }
418              
419             sub VPCMPUB($$$$) # Packed CoMPare Unsigned Byte
420 10     10 1 27 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code
421 10         23 vpcmpu 8, $k2, $xmm1, $xmm2, $op
422             }
423              
424             sub VPCMPUW($$$$) # Packed CoMPare Unsigned Word
425 12     12 1 33 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code
426 12         32 vpcmpu 16, $k2, $xmm1, $xmm2, $op
427             }
428              
429             sub VPCMPUD($$$$) # Packed CoMPare Unsigned Dword
430 12     12 1 33 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code
431 12         27 vpcmpu 32, $k2, $xmm1, $xmm2, $op
432             }
433              
434             sub VPCMPUQ($$$$) # Packed CoMPare Unsigned Qword
435 12     12 1 76 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code
436 12         31 vpcmpu 64, $k2, $xmm1, $xmm2, $op
437             }
438              
439             #D1 VPTEST # Packed TEST
440             #D2 VPTESTM # Packed TEST MASK
441              
442             sub andAndTest($$) #P And two bit strings of the same length and return 0 if the result is 0 else 1
443 308     308 1 698 {my ($a, $b) = @_; # Element, element
444 308         514 my $N = requireSameLength $a, $b; # Check that the two elements have the same length
445 308         529 for(0..$N-1) # Look for match
446 4113 100 100     7651 {return 1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '1';
447             }
448             0
449 248         602 }
450              
451             sub vptest($$$) #P Packed TEST
452 31     31 1 57 {my ($size, $xmm1, $xmm2) = @_; # Size of element in bits, element, element
453              
454 31 50       65 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
455 31         71 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
456              
457 31         56 my $N = length($xmm1) / $size; # Number of elements
458 31         59 my $k1 = maskRegister;
459 31         84 for(0..$N-1)
460 308         472 {my $o = $_ * $size;
461 308 100       609 substr($k1, $_, 1) = andAndTest(substr($xmm1, $o, $size), # Test two elements
462             substr($xmm2, $o, $size)) ? '1' : '0';
463             }
464              
465 31         63 substr(zBytes(8).substr($k1, 0, $N), -64)
466             }
467              
468             sub VPTESTMB($$) # Packed TEST Mask Byte
469 10     10 1 20 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
470 10         20 vptest 8, $xmm1, $xmm2
471             }
472              
473             sub VPTESTMW($$) # Packed TEST Mask Word
474 10     10 1 20 {my ($xmm1, $xmm2) = @_; # Words, words
475 10         20 vptest 16, $xmm1, $xmm2
476             }
477              
478             sub VPTESTMD($$) # Packed TEST Mask Dword
479 10     10 1 21 {my ($xmm1, $xmm2) = @_; # Dwords, dwords
480 10         21 vptest 32, $xmm1, $xmm2
481             }
482              
483             sub VPTESTMQ($$) # Packed TEST Mask Quad
484 1     1 1 3 {my ($xmm1, $xmm2) = @_; # Quads, quads
485 1         3 vptest 64, $xmm1, $xmm2
486             }
487              
488             #D0
489             #-------------------------------------------------------------------------------
490             # Export
491             #-------------------------------------------------------------------------------
492              
493 1     1   10 use Exporter qw(import);
  1         2  
  1         65  
494              
495 1     1   7 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         2  
  1         510  
496              
497             @ISA = qw(Exporter);
498             @EXPORT_OK = qw(
499             );
500             %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
501              
502             # podDocumentation
503              
504             =pod
505              
506             =encoding utf-8
507              
508             =head1 Name
509              
510             Simd::Avx512 - Emulate SIMD instructions
511              
512             =head1 Synopsis
513              
514             Help needed please!
515              
516             =head1 Description
517              
518             Emulate SIMD instructions
519              
520              
521             Version 20210127.
522              
523              
524             The following sections describe the methods in each functional area of this
525             module. For an alphabetic listing of all methods by name see L.
526              
527              
528              
529             =head1 Instructions
530              
531             Emulation of Avx512 instructions
532              
533             =head2 PSLLDQ($xmm1, $imm8)
534              
535             Packed Shift Left Logical DoubleQword
536              
537             Parameter Description
538             1 $xmm1 Bytes
539             2 $imm8 Length of shift in bytes
540              
541             B
542              
543              
544              
545             is_deeply PSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
546              
547             #Q0 1 2 3 4 5 6 7 8
548             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
549             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
550             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
551             #b
552             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
553             ,2),
554             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
555              
556              
557             =head2 VPSLLDQ($xmm1, $imm8)
558              
559             Packed Shift Left Logical DoubleQword
560              
561             Parameter Description
562             1 $xmm1 Bytes
563             2 $imm8 Length of shift in bytes
564              
565             B
566              
567              
568              
569             is_deeply VPSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
570              
571             #Q0 1 2 3 4 5 6 7 8
572             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
573             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
574             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
575             #b
576             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
577             ,2),
578             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
579              
580              
581             is_deeply VPSLLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
582              
583             #Q0 1 2 3 4 5 6 7 8
584             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
585             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
586             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
587             #b
588             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
589             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
590             ,2),
591             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
592             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
593              
594              
595             is_deeply VPSLLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
596              
597             #Q0 1 2 3 4 5 6 7 8
598             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
599             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
600             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
601             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
602             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
603             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
604             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
605             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
606             ,2),
607             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
608             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
609             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
610             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
611              
612              
613             =head2 PSRLDQ($xmm1, $imm8)
614              
615             Packed Shift Right Logical DoubleQword
616              
617             Parameter Description
618             1 $xmm1 Bytes
619             2 $imm8 Length of shift
620              
621             B
622              
623              
624              
625             is_deeply PSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
626              
627             #Q0 1 2 3 4 5 6 7 8
628             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
629             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
630             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
631             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
632             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
633             ,2),
634             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
635              
636              
637             =head2 VPSRLDQ($xmm1, $imm8)
638              
639             Packed Shift Right Logical DoubleQword
640              
641             Parameter Description
642             1 $xmm1 Bytes
643             2 $imm8 Length of shift
644              
645             B
646              
647              
648              
649             is_deeply VPSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
650              
651             #Q0 1 2 3 4 5 6 7 8
652             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
653             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
654             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
655             #b
656             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
657             ,2),
658             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
659              
660              
661             is_deeply VPSRLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
662              
663             #Q0 1 2 3 4 5 6 7 8
664             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
665             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
666             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
667             #b
668             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
669             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
670             ,2),
671             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
672             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
673              
674              
675             is_deeply VPSRLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
676              
677             #Q0 1 2 3 4 5 6 7 8
678             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
679             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
680             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
681             #b
682             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
683             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
684             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
685             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
686             ,2),
687             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
688             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
689             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
690             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
691              
692              
693             =head1 PCMP
694              
695             Packed CoMPare
696              
697             =head2 PCMPEQ
698              
699             Packed CoMPare EQual
700              
701             =head3 PCMPEQB($xmm1, $xmm2)
702              
703             Packed CoMPare EQual Byte
704              
705             Parameter Description
706             1 $xmm1 Bytes
707             2 $xmm2 Bytes
708              
709             B
710              
711              
712              
713             is_deeply PCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
714              
715             #Q0 1 2 3 4 5 6 7 8
716             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
717             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
718             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
719             #b
720             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
721             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
722             ),
723             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
724              
725              
726             =head3 PCMPEQW($xmm1, $xmm2)
727              
728             Packed CoMPare EQual Word
729              
730             Parameter Description
731             1 $xmm1 Words
732             2 $xmm2 Words
733              
734             B
735              
736              
737              
738             is_deeply PCMPEQW( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
739              
740             #Q0 1 2 3 4 5 6 7 8
741             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
742             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
743             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
744             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
745             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
746             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
747             ),
748             '00000000000000001111111111111111000000000000000000000000000000001111111111111111111111111111111111111111111111110000000000000000'
749              
750              
751             =head3 PCMPEQD($xmm1, $xmm2)
752              
753             Packed CoMPare EQual DWord
754              
755             Parameter Description
756             1 $xmm1 DWords
757             2 $xmm2 DWords
758              
759             B
760              
761              
762              
763             is_deeply PCMPEQD( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
764              
765             #Q0 1 2 3 4 5 6 7 8
766             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
767             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
768             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
769             #b
770             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
771             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
772             ),
773             '00000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111100000000000000000000000000000000'
774              
775              
776             =head3 PCMPEQQ($xmm1, $xmm2)
777              
778             Packed CoMPare EQual QWord
779              
780             Parameter Description
781             1 $xmm1 QWords
782             2 $xmm2 QWords
783              
784             B
785              
786              
787              
788             is_deeply PCMPEQQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
789              
790             #Q0 1 2 3 4 5 6 7 8
791             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
792             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
793             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
794             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
795             '11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011110'
796             ,'11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011010'
797             ),
798             '11111111111111111111111111111111111111111111111111111111111111110000000000000000000000000000000000000000000000000000000000000000'
799              
800              
801             =head2 PCMPGT
802              
803             Packed CoMPare Greater Than
804              
805             =head3 PCMPGTB($xmm1, $xmm2)
806              
807             Packed CoMPare Greater Than Byte
808              
809             Parameter Description
810             1 $xmm1 Bytes
811             2 $xmm2 Bytes
812              
813             B
814              
815              
816              
817             is_deeply PCMPGTB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
818              
819             #Q0 1 2 3 4 5 6 7 8
820             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
821             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
822             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
823             #b
824             '11100001000010000000000011000000010000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
825             ,'11100001000001000000000001000000100000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
826             ),
827             '00000000111111110000000000000000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000011111111'
828              
829              
830             =head3 PCMPGTW($xmm1, $xmm2)
831              
832             Packed CoMPare Greater Than Word
833              
834             Parameter Description
835             1 $xmm1 Words
836             2 $xmm2 Words
837              
838             B
839              
840              
841              
842             is_deeply PCMPGTW( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
843              
844             #Q0 1 2 3 4 5 6 7 8
845             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
846             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
847             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
848             #b
849             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
850             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000000111110000000000000000000000000000000011010'
851             ),
852             '11111111111111110000000000000000111111111111111100000000000000000000000000000000111111111111111100000000000000001111111111111111'
853              
854              
855             =head3 PCMPGTD($xmm1, $xmm2)
856              
857             Packed CoMPare Greater Than DWord
858              
859             Parameter Description
860             1 $xmm1 DWords
861             2 $xmm2 DWords
862              
863             B
864              
865              
866              
867             is_deeply PCMPGTD( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
868              
869             #Q0 1 2 3 4 5 6 7 8
870             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
871             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
872             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
873             #b
874             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
875             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
876             ),
877             '11111111111111111111111111111111111111111111111111111111111111110000000000000000000000000000000011111111111111111111111111111111'
878              
879              
880             =head3 PCMPGTQ($xmm1, $xmm2)
881              
882             Packed CoMPare Greater Than QWord
883              
884             Parameter Description
885             1 $xmm1 QWords
886             2 $xmm2 QWords
887              
888             B
889              
890              
891              
892             is_deeply PCMPGTQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
893              
894             #Q0 1 2 3 4 5 6 7 8
895             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
896             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
897             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
898             #b
899             '11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011110'
900             ,'11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011010'
901             ),
902             '00000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111'
903              
904              
905             =head1 VPCMP
906              
907             Packed CoMPare
908              
909             =head2 VPCMPEQ
910              
911             Packed CoMPare EQual
912              
913             =head3 VPCMPEQB($k2, $xmm1, $xmm2)
914              
915             Packed CoMPare EQual Byte with optional masking
916              
917             Parameter Description
918             1 $k2 Optional input mask
919             2 $xmm1 Bytes
920             3 $xmm2 Bytes
921              
922             B
923              
924              
925              
926             is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
927              
928             #Q0 1 2 3 4 5 6 7 8
929             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
930             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
931             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
932             #b
933             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
934             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
935             ),
936             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
937              
938              
939             is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
940              
941             #Q0 1 2 3 4 5 6 7 8
942             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
943             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
944             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
945             #b
946             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
947             .'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
948             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
949             .'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
950             ),
951             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
952             .'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
953              
954              
955             is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
956              
957             #Q0 1 2 3 4 5 6 7 8
958             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
959             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
960             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
961             #b
962             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
963             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
964             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
965             ),
966             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
967              
968              
969             is_deeply VPCMPEQB( # 256 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
970              
971             #Q0 1 2 3 4 5 6 7 8
972             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
973             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
974             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
975             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
976             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
977             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
978             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
979             ),
980             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
981              
982              
983             is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
984              
985             #Q0 1 2 3 4 5 6 7 8
986             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
987             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
988             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
989             #b
990             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
991            
992            
993             ),
994             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
995              
996              
997             =head3 VPCMPEQW($k2, $xmm1, $xmm2)
998              
999             Packed CoMPare EQual Byte with optional masking
1000              
1001             Parameter Description
1002             1 $k2 Optional input mask
1003             2 $xmm1 Words
1004             3 $xmm2 Words
1005              
1006             =head3 VPCMPEQD($k2, $xmm1, $xmm2)
1007              
1008             Packed CoMPare EQual Byte with optional masking
1009              
1010             Parameter Description
1011             1 $k2 Optional input mask
1012             2 $xmm1 Dwords
1013             3 $xmm2 Dwords
1014              
1015             =head3 VPCMPEQQ($k2, $xmm1, $xmm2)
1016              
1017             Packed CoMPare EQual Byte with optional masking
1018              
1019             Parameter Description
1020             1 $k2 Optional input mask
1021             2 $xmm1 Qwords
1022             3 $xmm2 Qwords
1023              
1024             =head2 VPCMP
1025              
1026             Packed CoMPare
1027              
1028             =head3 VPCMPB($k2, $xmm1, $xmm2, $op)
1029              
1030             Packed CoMPare Byte
1031              
1032             Parameter Description
1033             1 $k2 Input mask
1034             2 $xmm1 Bytes
1035             3 $xmm2 Bytes
1036             4 $op Test code
1037              
1038             B
1039              
1040              
1041             my ($mi, $mo, $o1, $o2) = ( # 128
1042             #Q0 1 2 3 4 5 6 7 8
1043             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1044             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1045             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1046             #b
1047             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1048             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1049             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1050             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1051             );
1052              
1053              
1054             is_deeply VPCMPB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1055              
1056              
1057             is_deeply VPCMPB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1058              
1059              
1060             my ($mi, $mo, $o1, $o2) = ( # 256
1061             #Q0 1 2 3 4 5 6 7 8
1062             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1063             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1064             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1065             #b
1066             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1067             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1068             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1069             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1070             );
1071              
1072             is_deeply VPCMPB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1073              
1074              
1075             is_deeply VPCMPB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1076              
1077              
1078             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1079             #Q0 1 2 3 4 5 6 7 8
1080             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1081             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1082             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1083             #b
1084             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1085             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1086             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1087             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1088            
1089            
1090             );
1091              
1092             is_deeply VPCMPB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1093              
1094              
1095             is_deeply VPCMPB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1096              
1097              
1098             is_deeply VPCMPB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1099              
1100              
1101             is_deeply VPCMPB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1102              
1103              
1104             is_deeply VPCMPB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1105              
1106              
1107             is_deeply VPCMPB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1108              
1109              
1110              
1111             =head3 VPCMPW($k2, $xmm1, $xmm2, $op)
1112              
1113             Packed CoMPare Word
1114              
1115             Parameter Description
1116             1 $k2 Input mask
1117             2 $xmm1 Words
1118             3 $xmm2 Words
1119             4 $op Test code
1120              
1121             =head3 VPCMPD($k2, $xmm1, $xmm2, $op)
1122              
1123             Packed CoMPare Dword
1124              
1125             Parameter Description
1126             1 $k2 Input mask
1127             2 $xmm1 Dwords
1128             3 $xmm2 Dwords
1129             4 $op Test code
1130              
1131             =head3 VPCMPQ($k2, $xmm1, $xmm2, $op)
1132              
1133             Packed CoMPare Qword
1134              
1135             Parameter Description
1136             1 $k2 Input mask
1137             2 $xmm1 Qwords
1138             3 $xmm2 Qwords
1139             4 $op Test code
1140              
1141             =head2 VPCMPU
1142              
1143             Packed CoMPare Unsigned
1144              
1145             =head3 VPCMPUB($k2, $xmm1, $xmm2, $op)
1146              
1147             Packed CoMPare Unsigned Byte
1148              
1149             Parameter Description
1150             1 $k2 Input mask
1151             2 $xmm1 Bytes
1152             3 $xmm2 Bytes
1153             4 $op Test code
1154              
1155             B
1156              
1157              
1158             my ($mi, $mo, $o1, $o2) = ( # 128
1159             #Q0 1 2 3 4 5 6 7 8
1160             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1161             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1162             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1163             #b
1164             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1165             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1166             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1167             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1168             );
1169              
1170              
1171             is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1172              
1173              
1174             is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1175              
1176              
1177             my ($mi, $mo, $o1, $o2) = ( # 256
1178             #Q0 1 2 3 4 5 6 7 8
1179             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1180             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1181             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1182             #b
1183             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1184             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1185             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1186             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1187             );
1188              
1189             is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1190              
1191              
1192             is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1193              
1194              
1195             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1196             #Q0 1 2 3 4 5 6 7 8
1197             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1198             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1199             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1200             #b
1201             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1202             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1203             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1204             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1205            
1206            
1207             );
1208              
1209             is_deeply VPCMPUB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1210              
1211              
1212             is_deeply VPCMPUB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1213              
1214              
1215             is_deeply VPCMPUB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1216              
1217              
1218             is_deeply VPCMPUB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1219              
1220              
1221             is_deeply VPCMPUB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1222              
1223              
1224             is_deeply VPCMPUB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1225              
1226              
1227              
1228             =head3 VPCMPUW($k2, $xmm1, $xmm2, $op)
1229              
1230             Packed CoMPare Unsigned Word
1231              
1232             Parameter Description
1233             1 $k2 Input mask
1234             2 $xmm1 Words
1235             3 $xmm2 Words
1236             4 $op Test code
1237              
1238             B
1239              
1240              
1241             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1242             #Q0 1 2 3 4 5 6 7 8
1243             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1244             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1245             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1246             #b
1247             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1248             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1249             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1250             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1251            
1252            
1253             );
1254             for my $i(\($mi, $meq, $mlt, $mgt))
1255             {$$i = zBytes(4).$$i;
1256             }
1257              
1258             is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1259              
1260              
1261             is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1262              
1263              
1264             is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1265              
1266              
1267             is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1268              
1269              
1270             is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1271              
1272              
1273             is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1274              
1275              
1276             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1277             #Q0 1 2 3 4 5 6 7 8
1278             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1279             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1280             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1281             #b
1282             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1283             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1284             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1285             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1286            
1287            
1288             );
1289             for my $i(\($mi, $meq, $mlt, $mgt))
1290             {$$i = zBytes(4).$$i;
1291             }
1292              
1293             is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1294              
1295              
1296             is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1297              
1298              
1299             is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1300              
1301              
1302             is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1303              
1304              
1305             is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1306              
1307              
1308             is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1309              
1310              
1311              
1312             =head3 VPCMPUD($k2, $xmm1, $xmm2, $op)
1313              
1314             Packed CoMPare Unsigned Dword
1315              
1316             Parameter Description
1317             1 $k2 Input mask
1318             2 $xmm1 Dwords
1319             3 $xmm2 Dwords
1320             4 $op Test code
1321              
1322             B
1323              
1324              
1325             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1326             #Q0 1 2 3 4 5 6 7 8
1327             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1328             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1329             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1330             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
1331             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1332             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1333             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1334             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1335            
1336            
1337             );
1338             for my $i(\($mi, $meq, $mlt, $mgt))
1339             {$$i = zBytes(6).$$i;
1340             }
1341              
1342             is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1343              
1344              
1345             is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1346              
1347              
1348             is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1349              
1350              
1351             is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1352              
1353              
1354             is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1355              
1356              
1357             is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1358              
1359              
1360             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1361             #Q0 1 2 3 4 5 6 7 8
1362             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1363             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1364             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1365             #b
1366             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1367             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1368             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1369             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1370            
1371            
1372             );
1373             for my $i(\($mi, $meq, $mlt, $mgt))
1374             {$$i = zBytes(6).$$i;
1375             }
1376              
1377             is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1378              
1379              
1380             is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1381              
1382              
1383             is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1384              
1385              
1386             is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1387              
1388              
1389             is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1390              
1391              
1392             is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1393              
1394              
1395              
1396             =head3 VPCMPUQ($k2, $xmm1, $xmm2, $op)
1397              
1398             Packed CoMPare Unsigned Qword
1399              
1400             Parameter Description
1401             1 $k2 Input mask
1402             2 $xmm1 Qwords
1403             3 $xmm2 Qwords
1404             4 $op Test code
1405              
1406             B
1407              
1408              
1409             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1410             #Q0 1 2 3 4 5 6 7 8
1411             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1412             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1413             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1414             #b
1415             '1'. '1'. '1'. '1'. '1'. '0'. '0'. '1',
1416             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0',
1417             '1'. '0'. '1'. '1'. '0'. '0'. '0'. '0',
1418             '0'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1419             '00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000',
1420            
1421             );
1422             for my $i(\($mi, $meq, $mlt, $mgt))
1423             {$$i = zBytes(7).$$i;
1424             }
1425              
1426             is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1427              
1428              
1429             is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1430              
1431              
1432             is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1433              
1434              
1435             is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1436              
1437              
1438             is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1439              
1440              
1441             is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1442              
1443              
1444             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1445             #Q0 1 2 3 4 5 6 7 8
1446             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1447             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1448             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1449             #b
1450             '1'. '1'. '1'. '1'. '1'. '0'. '0'. '1',
1451             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0',
1452             '1'. '0'. '1'. '1'. '0'. '0'. '0'. '0',
1453             '0'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1454            
1455            
1456             );
1457             for my $i(\($mi, $meq, $mlt, $mgt))
1458             {$$i = zBytes(7).$$i;
1459             }
1460              
1461             is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1462              
1463              
1464             is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1465              
1466              
1467             is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1468              
1469              
1470             is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1471              
1472              
1473             is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1474              
1475              
1476             is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1477              
1478              
1479              
1480             =head1 VPTEST
1481              
1482             Packed TEST
1483              
1484             =head2 VPTESTM
1485              
1486             Packed TEST MASK
1487              
1488             =head3 VPTESTMB($xmm1, $xmm2)
1489              
1490             Packed TEST Mask Byte
1491              
1492             Parameter Description
1493             1 $xmm1 Bytes
1494             2 $xmm2 Bytes
1495              
1496             B
1497              
1498              
1499             my ($o1, $o2, $k1) = ( # 128
1500             #Q0 1 2
1501             #D0 1 2 3 4
1502             #W0 1 2 3 4 5 6 7 0
1503             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
1504             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
1505             '00000001000010000000000011000000000100000000001000010010000000000001100000000000000010100000101000011000000000111111111100010000',
1506             '10000001000010000000100011001000000001000001000000001100000001000000000000010010000101000001010000000110000111000000000000010000',
1507             '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1508             );
1509              
1510             is_deeply VPTESTMB($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1511              
1512              
1513              
1514             =head3 VPTESTMW($xmm1, $xmm2)
1515              
1516             Packed TEST Mask Word
1517              
1518             Parameter Description
1519             1 $xmm1 Words
1520             2 $xmm2 Words
1521              
1522             B
1523              
1524              
1525             my ($o1, $o2, $k1) = ( # 256
1526             #Q0 1 2 3 4 5 6 7 8
1527             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1528             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1529             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1530             #b
1531             '0000110100001000000000001100000000010000000000000001001000000000000000000000000000000000110000000000000000000000000011000001000100000001000010000000000011000000000100000000000000010010000000000000010000000000000000000000000000000000000000000000000000010000',
1532             '0000000101001000000000001100000000010000110000000001001000000000000000000000000000000011000000000000000110000000000000010001000100000001100010000000000011000000000100000010000000010010001000000000000000010000000000000100000000000001100000000000000000010000',
1533             '1'. '1'. '1'. '1'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '1',
1534             );
1535              
1536              
1537             is_deeply VPTESTMW($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1538              
1539              
1540              
1541             =head3 VPTESTMD($xmm1, $xmm2)
1542              
1543             Packed TEST Mask Dword
1544              
1545             Parameter Description
1546             1 $xmm1 Dwords
1547             2 $xmm2 Dwords
1548              
1549             B
1550              
1551              
1552             my ($o1, $o2, $k1) = ( # 512
1553             #Q0 1 2 3 4 5 6 7 8
1554             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1555             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1556             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1557             #b
1558            
1559            
1560             '1'. '1'. '0'. '1'. '1'. '1'. '0'. '1'. '1'. '0'. '1'. '1'. '0'. '0'. '0'. '1',
1561             );
1562              
1563              
1564             is_deeply VPTESTMD($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1565              
1566              
1567              
1568             =head3 VPTESTMQ($xmm1, $xmm2)
1569              
1570             Packed TEST Mask Quad
1571              
1572             Parameter Description
1573             1 $xmm1 Quads
1574             2 $xmm2 Quads
1575              
1576             B
1577              
1578              
1579             my ($o1, $o2, $k1) = ( # 512
1580             #Q0 1 2 3 4 5 6 7 8
1581             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1582             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1583             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1584             #b
1585            
1586            
1587             '1'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1588             );
1589              
1590              
1591             is_deeply VPTESTMQ($o1, $o2), zBytes(7).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1592              
1593              
1594              
1595              
1596             =head1 Private Methods
1597              
1598             =head2 pcmpeq($size, $xmm1, $xmm2)
1599              
1600             Packed CoMPare EQual
1601              
1602             Parameter Description
1603             1 $size Size in bits
1604             2 $xmm1 Element
1605             3 $xmm2 Element
1606              
1607             =head2 pcmpgt($size, $xmm1, $xmm2)
1608              
1609             Packed CoMPare Greater Than
1610              
1611             Parameter Description
1612             1 $size Size in bits
1613             2 $xmm1 Element
1614             3 $xmm2 Element
1615              
1616             =head2 vpcmpeq($size, $k2, $xmm1, $xmm2)
1617              
1618             Packed CoMPare EQual Byte|word|double|quad with optional masking
1619              
1620             Parameter Description
1621             1 $size Size in bits: 8|16|32|64 of each element
1622             2 $k2 Optional input mask
1623             3 $xmm1 Bytes
1624             4 $xmm2 Bytes
1625              
1626             =head2 vpcmp($size, $k2, $xmm1, $xmm2, $op)
1627              
1628             Packed CoMPare
1629              
1630             Parameter Description
1631             1 $size Size of element in bits
1632             2 $k2 Input mask
1633             3 $xmm1 Bytes
1634             4 $xmm2 Bytes
1635             5 $op Test code
1636              
1637             =head2 vpcmpu($size, $k2, $xmm1, $xmm2, $op)
1638              
1639             Packed CoMPare Unsigned
1640              
1641             Parameter Description
1642             1 $size Size of element in bits
1643             2 $k2 Input mask
1644             3 $xmm1 Bytes
1645             4 $xmm2 Bytes
1646             5 $op Test code
1647              
1648             =head2 andAndTest($a, $b)
1649              
1650             And two bit strings of the same length and return 0 if the result is 0 else 1
1651              
1652             Parameter Description
1653             1 $a Element
1654             2 $b Element
1655              
1656             =head2 vptest($size, $xmm1, $xmm2)
1657              
1658             Packed TEST
1659              
1660             Parameter Description
1661             1 $size Size of element in bits
1662             2 $xmm1 Element
1663             3 $xmm2 Element
1664              
1665              
1666             =head1 Index
1667              
1668              
1669             1 L - And two bit strings of the same length and return 0 if the result is 0 else 1
1670              
1671             2 L - Packed CoMPare EQual
1672              
1673             3 L - Packed CoMPare EQual Byte
1674              
1675             4 L - Packed CoMPare EQual DWord
1676              
1677             5 L - Packed CoMPare EQual QWord
1678              
1679             6 L - Packed CoMPare EQual Word
1680              
1681             7 L - Packed CoMPare Greater Than
1682              
1683             8 L - Packed CoMPare Greater Than Byte
1684              
1685             9 L - Packed CoMPare Greater Than DWord
1686              
1687             10 L - Packed CoMPare Greater Than QWord
1688              
1689             11 L - Packed CoMPare Greater Than Word
1690              
1691             12 L - Packed Shift Left Logical DoubleQword
1692              
1693             13 L - Packed Shift Right Logical DoubleQword
1694              
1695             14 L - Packed CoMPare
1696              
1697             15 L - Packed CoMPare Byte
1698              
1699             16 L - Packed CoMPare Dword
1700              
1701             17 L - Packed CoMPare EQual Byte|word|double|quad with optional masking
1702              
1703             18 L - Packed CoMPare EQual Byte with optional masking
1704              
1705             19 L - Packed CoMPare EQual Byte with optional masking
1706              
1707             20 L - Packed CoMPare EQual Byte with optional masking
1708              
1709             21 L - Packed CoMPare EQual Byte with optional masking
1710              
1711             22 L - Packed CoMPare Qword
1712              
1713             23 L - Packed CoMPare Unsigned
1714              
1715             24 L - Packed CoMPare Unsigned Byte
1716              
1717             25 L - Packed CoMPare Unsigned Dword
1718              
1719             26 L - Packed CoMPare Unsigned Qword
1720              
1721             27 L - Packed CoMPare Unsigned Word
1722              
1723             28 L - Packed CoMPare Word
1724              
1725             29 L - Packed Shift Left Logical DoubleQword
1726              
1727             30 L - Packed Shift Right Logical DoubleQword
1728              
1729             31 L - Packed TEST
1730              
1731             32 L - Packed TEST Mask Byte
1732              
1733             33 L - Packed TEST Mask Dword
1734              
1735             34 L - Packed TEST Mask Quad
1736              
1737             35 L - Packed TEST Mask Word
1738              
1739             =head1 Installation
1740              
1741             This module is written in 100% Pure Perl and, thus, it is easy to read,
1742             comprehend, use, modify and install via B:
1743              
1744             sudo cpan install Simd::Avx512
1745              
1746             =head1 Author
1747              
1748             L
1749              
1750             L
1751              
1752             =head1 Copyright
1753              
1754             Copyright (c) 2016-2021 Philip R Brenan.
1755              
1756             This module is free software. It may be used, redistributed and/or modified
1757             under the same terms as Perl itself.
1758              
1759             =cut
1760              
1761              
1762              
1763             # Tests and documentation
1764              
1765             sub test
1766 1     1 0 7 {my $p = __PACKAGE__;
1767 1         9 binmode($_, ":utf8") for *STDOUT, *STDERR;
1768 1 50       72 return if eval "eof(${p}::DATA)";
1769 1         58 my $s = eval "join('', <${p}::DATA>)";
1770 1 50       49 $@ and die $@;
1771 1 100   1 0 8 eval $s;
  1 100   1   17  
  1 100   1   51  
  1 100   1   5  
  1     9   2  
  1         30  
  1         545  
  1         1275  
  1         4  
  1         845  
  1         66248  
  1         9  
  1         264  
  9         2665  
  9         13  
  9         15  
  9         21  
  9         28  
  9         24  
  9         28  
  9         26  
  9         25  
  9         54  
  9         40  
1772 1 50       1019 $@ and die $@;
1773 1         152 1
1774             }
1775              
1776             test unless caller;
1777              
1778             1;
1779             # podDocumentation
1780             __DATA__