File Coverage

blib/lib/Simd/Avx512.pm
Criterion Covered Total %
statement 361 399 90.4
branch 122 168 72.6
condition 35 81 43.2
subroutine 87 105 82.8
pod 51 82 62.2
total 656 835 78.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/
2             #-------------------------------------------------------------------------------
3             # Simd::Avx512 - Emulate SIMD Avx512 instructions
4             # Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             package Simd::Avx512;
8             our $VERSION = 20210130;
9 1     1   2182 use warnings FATAL => qw(all);
  1         10  
  1         38  
10 1     1   6 use strict;
  1         2  
  1         30  
11 1     1   6 use Carp;
  1         3  
  1         91  
12 1     1   636 use Data::Dump qw(dump);
  1         8070  
  1         65  
13 1     1   8 use feature qw(say current_sub);
  1         2  
  1         7030  
14              
15             my $develop = -e q(/home/phil/); # Development mode
16              
17             sub repeat($$) # Repeat a string
18 412     412 0 702 {my ($string, $repeat) = @_; # String to repeat, number of repetitions
19 412         2882 $string x $repeat
20             }
21              
22 159     159 0 253 sub zByte {repeat('0', 8)} # Zero byte
23 0     0 0 0 sub zWord {repeat('0', 16)} # Zero word
24 0     0 0 0 sub zDWord {repeat('0', 32)} # Zero double word
25 90     90 0 208 sub zQWord {repeat('0', 64)} # Zero quad word
26              
27             sub zBytes($) # String of zero bytes of specified length
28 159     159 0 271 {my ($length) = @_; # Length
29 159         291 repeat(zByte, $length)
30             }
31              
32             sub zWords($) # String of zero words of specified length
33 0     0 0 0 {my ($length) = @_; # Length
34 0         0 repeat(zWord, $length)
35             }
36              
37             sub zDWords($) # String of zero double words of specified length
38 0     0 0 0 {my ($length) = @_; # Length
39 0         0 repeat(zDWord, $length)
40             }
41              
42             sub zQWords($) # String of zero quad words of specified length
43 0     0 0 0 {my ($length) = @_; # Length
44 0         0 repeat(zQWord, $length)
45             }
46              
47             sub byte($) # A byte with the specified value
48 0     0 0 0 {my ($value) = @_; # Value of the byte
49 0 0 0     0 confess "0 - 2**8 required ($value)" unless $value >= 0 and $value < 2**8;
50 0         0 sprintf("%08b", $value)
51             }
52              
53             sub word($) # A word with the specified value
54 0     0 0 0 {my ($value) = @_; # Value of the word
55 0 0 0     0 confess "0 - 2**16 required ($value)" unless $value >= 0 and $value < 2**16;
56 0         0 sprintf("%016b", $value)
57             }
58              
59             sub dWord($) # A double word with the specified value
60 0     0 0 0 {my ($value) = @_; # Value of the double word
61 0 0 0     0 confess "0 - 2**32 required ($value)" unless $value >= 0 and $value < 2**32;
62 0         0 sprintf("%032b", $value)
63             }
64              
65             sub qWord($) # A quad word with the specified value
66 0     0 0 0 {my ($value) = @_; # Value of the quad word
67 0 0 0     0 confess "0 - 2**64 required ($value)" unless $value >= 0 and $value < 2**64;
68 0         0 sprintf("%064b", $value)
69             }
70              
71 90     90 0 196 sub maskRegister {zQWord} # Mask register set to zero
72              
73             sub require8or16or32or64($) # Check that we have a size of 8|16|32|64 bits
74 0     0 0 0 {my ($size) = @_; # Size to check
75 0 0 0     0 confess "8|16|32|64 required for operand ($size)" unless $size == 8 or $size == 16 or $size == 32 or $size == 64;
      0        
      0        
76             }
77              
78             sub requireN($$) # Check that we have a string of N bits
79 110     110 0 189 {my ($size, $xmm) = @_; # Size in bits, bits
80 110 50       223 defined($xmm) or confess;
81 110         154 my $l = length $xmm;
82 110 50       191 confess "$size bits required for operand ($l)" unless $l == $size;
83 110 50       468 confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
84             }
85              
86             sub require8($) # Check that we have a string of 8 bits
87 2     2 0 5 {my ($xmm) = @_; # Byte
88 2         5 requireN(8, $xmm);
89             }
90              
91             sub require16($) # Check that we have a string of 16 bits
92 2     2 0 4 {my ($xmm) = @_; # Word
93 2         6 requireN(16, $xmm);
94             }
95              
96             sub require32($) # Check that we have a string of 32 bits
97 2     2 0 6 {my ($xmm) = @_; # DWord
98 2         5 requireN(32, $xmm);
99             }
100              
101             sub require64($) # Check that we have a string of 64 bits
102 62     62 0 107 {my ($xmm) = @_; # Bytes
103 62         121 requireN(64, $xmm);
104             }
105              
106             sub require128($) # Check that we have a string of 128 bits
107 42     42 0 65 {my ($xmm) = @_; # Word
108 42         84 requireN(128, $xmm);
109             }
110              
111             sub require256($) # Check that we have a string of 256 bits
112 0     0 0 0 {my ($xmm) = @_; # Dword
113 0         0 requireN(256, $xmm);
114             }
115              
116             sub require512($) # Check that we have a string of 512 bits
117 0     0 0 0 {my ($xmm) = @_; # Qword
118 0         0 requireN(128, $xmm);
119             }
120              
121             sub require128or256or512($;$) # Check that we have a string of 128|256|512 bits in the first operand and optionally the same in the second operand
122 108     108 0 191 {my ($xmm1, $xmm2) = @_; # Bytes, optional bytes
123 108         152 my $l = length $xmm1;
124 108 50 100     432 confess "128|256|512 bits required for first operand ($l)" unless $l == 128 or $l == 256 or $l == 512;
      66        
125 108 100       218 if (defined $xmm2)
126 96         132 {my $m = length $xmm2;
127 96 50 100     302 confess "128|256|512 bits required for second operand ($m)" unless $m == 128 or $m == 256 or $m == 512;
      66        
128 96 50       183 confess "Operands must have same length($l,$m)" unless $l == $m;
129             }
130             $l
131 108         159 }
132              
133             sub require64or128or256or512($) # Check that we have a string of 64|128|256|512 bits
134 3     3 0 9 {my ($xmm) = @_; # Bytes
135 3         8 my $l = length $xmm;
136 3 0 33     10 confess "64|128|256|512 bits required for operand" unless $l == 64 or $l == 128 or $l == 256 or $l == 512;
      33        
      0        
137 3 50       16 confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
138             }
139              
140             sub requireSameLength($$) # Check that the two operands have the same length
141 448     448 0 687 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
142 448         671 my ($l, $L) = (length($xmm1), length($xmm2));
143 448 50       772 confess "Operands have different lengths($l, $L)" unless $l == $L;
144 448         675 $l
145             }
146              
147             sub requireNumber128or256or512($) # Check that we have a number with a value of 128|256|512
148 4     4 0 8 {my ($n) = @_; # Number
149 4 50 33     28 confess "128|256|512 required for operand" unless $n == 128 or $n == 256 or $n == 512;
      33        
150             }
151              
152             sub flipBitsUnderMask($$) # Flip the bits in a string where the corresponding mask bit is 1 else leave the bit as is
153 29     29 0 67 {my ($string, $mask) = @_; # Bit string, mask
154 29         61 my $l = requireSameLength $string, $mask;
155 29         47 my $f = '';
156 29         68 for my $i(0..$l-1) # Each character in the string and mask
157 1636         2101 {my $s = substr($string, $i, 1);
158 1636 100       2799 $f .= substr($mask, $i, 1) eq '0' ? $s : $s eq '0' ? '1' : '0'
    100          
159             }
160             $f
161 29         162 }
162              
163             sub compareTwosComplement($$) # Compare two numbers in two's complement formats and return -1 if the first number is less than the second, 0 if they are equal, else +1
164 103     103 0 250 {my ($a, $b) = @_; # First, second
165 103         182 my $n = requireSameLength $a, $b;
166              
167 103 100 100     329 return -1 if substr($a, 0, 1) eq '1' and substr($b, 0, 1) eq '0'; # Leading sign bit
168 88 100 100     286 return +1 if substr($a, 0, 1) eq '0' and substr($b, 0, 1) eq '1';
169              
170 81         132 for(1..$n) # Non sign bits
171 701 100 100     1777 {return -1 if substr($a, $_, 1) eq '0' and substr($b, $_, 1) eq '1';
172 691 100 100     1511 return +1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '0';
173             }
174             0 # Equal
175 53         175 }
176              
177             #D1 Instructions # Emulation of Avx512 instructions
178              
179             sub PSLLDQ($$) # Packed Shift Left Logical DoubleQword
180 8     8 1 21 {my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
181 8         21 require128 $xmm1; # Check that we have a string of 128 bits
182 8         22 substr($xmm1, $imm8 * 8).zBytes($imm8)
183             }
184              
185             sub VPSLLDQ($$) # Packed Shift Left Logical DoubleQword
186 3     3 1 8 {my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
187 3         9 require128or256or512 $xmm1; # Check that we have a string of 128 bits
188 3 50 33     12 confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
189              
190 3 100       10 return PSLLDQ($xmm1, $imm8) if length($xmm1) == 128;
191              
192 2 100       13 return PSLLDQ(substr($xmm1, 0, 128), $imm8).
193             PSLLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
194              
195 1         12 return PSLLDQ(substr($xmm1, 0, 128), $imm8).
196             PSLLDQ(substr($xmm1, 128, 128), $imm8).
197             PSLLDQ(substr($xmm1, 256, 128), $imm8).
198             PSLLDQ(substr($xmm1, 384, 128), $imm8)
199             }
200              
201             sub PSRLDQ($$) # Packed Shift Right Logical DoubleQword
202 18     18 1 47 {my ($xmm1, $imm8) = @_; # Bytes, length of shift
203 18         43 require128 $xmm1; # Check that we have a string of 128 bits
204 18         34 zBytes($imm8).substr($xmm1, 0, 128 - $imm8 * 8)
205             }
206              
207             sub VPSRLDQ($$) # Packed Shift Right Logical DoubleQword
208 3     3 1 9 {my ($xmm1, $imm8) = @_; # Bytes, length of shift
209 3         11 require128or256or512 $xmm1; # Check that we have a string of 128 bits
210 3 50 33     17 confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
211              
212 3 100       11 return PSRLDQ($xmm1, $imm8) if length($xmm1) == 128;
213              
214 2 100       15 return PSRLDQ(substr($xmm1, 0, 128), $imm8).
215             PSRLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
216              
217 1         4 return PSRLDQ(substr($xmm1, 0, 128), $imm8).
218             PSRLDQ(substr($xmm1, 128, 128), $imm8).
219             PSRLDQ(substr($xmm1, 256, 128), $imm8).
220             PSRLDQ(substr($xmm1, 384, 128), $imm8)
221             }
222              
223             #D1 PCMP # Packed CoMPare
224             #D2 PCMPEQ # Packed CoMPare EQual
225              
226             sub pcmpeq($$$) #P Packed CoMPare EQual
227 4     4 1 12 {my ($size, $xmm1, $xmm2) = @_; # Size in bits, element, element
228              
229 4 50       10 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
230 4         14 require128 $xmm1; # Check that we have a string of 128 bits in the first operand
231 4         10 require128 $xmm2; # Check that we have a string of 128 bits in the second operand
232 4         12 requireSameLength $xmm1, $xmm2; # Check operands have the same length
233              
234 4         9 my $N = 128 / $size; # Bytes in operation
235 4         13 my $clear = '0' x $size;
236 4         9 my $set = '1' x $size;
237 4         8 my $xmm3 = zBytes $N;
238 4         18 for(0..$N-1)
239 30         42 {my $o = $_ * $size;
240 30 100       71 substr($xmm3, $o, $size) =
241             substr($xmm1, $o, $size) eq
242             substr($xmm2, $o, $size) ? $set : $clear;
243             }
244             $xmm3
245 4         16 }
246              
247             sub PCMPEQB($$) # Packed CoMPare EQual Byte
248 1     1 1 4 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
249 1         5 pcmpeq 8, $xmm1, $xmm2;
250             }
251              
252             sub PCMPEQW($$) # Packed CoMPare EQual Word
253 1     1 1 5 {my ($xmm1, $xmm2) = @_; # Words, words
254 1         4 pcmpeq 16, $xmm1, $xmm2;
255             }
256              
257             sub PCMPEQD($$) # Packed CoMPare EQual DWord
258 1     1 1 5 {my ($xmm1, $xmm2) = @_; # DWords, DWords
259 1         4 pcmpeq 32, $xmm1, $xmm2;
260             }
261              
262             sub PCMPEQQ($$) # Packed CoMPare EQual QWord
263 1     1 1 4 {my ($xmm1, $xmm2) = @_; # QWords, QWords
264 1         5 pcmpeq 64, $xmm1, $xmm2;
265             }
266              
267             #D2 PCMPGT # Packed CoMPare Greater Than
268              
269             sub pcmpgt($$$) #P Packed CoMPare Greater Than
270 4     4 1 9 {my ($size, $xmm1, $xmm2) = @_; # Size in bits, element, element
271              
272 4 50       11 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
273 4         13 require128 $xmm1; # Check that we have a string of 128 bits in the first operand
274 4         11 require128 $xmm2; # Check that we have a string of 128 bits in the second operand
275 4         12 requireSameLength $xmm1, $xmm2; # Check operands have the same length
276              
277 4         10 my $N = 128 / $size; # Bytes in operation
278 4         10 my $clear = '0' x $size;
279 4         9 my $set = '1' x $size;
280 4         8 my $xmm3 = zBytes $N;
281 4         20 for(0..$N-1)
282 30         49 {my $o = $_ * $size;
283 30 100       66 substr($xmm3, $o, $size) = +1 == compareTwosComplement( # Signed compare
284             substr($xmm1, $o, $size),
285             substr($xmm2, $o, $size)) ? $set : $clear;
286             }
287             $xmm3
288 4         18 }
289              
290             sub PCMPGTB($$) # Packed CoMPare Greater Than Byte
291 1     1 1 6 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
292 1         4 pcmpgt 8, $xmm1, $xmm2;
293             }
294              
295             sub PCMPGTW($$) # Packed CoMPare Greater Than Word
296 1     1 1 3 {my ($xmm1, $xmm2) = @_; # Words, words
297 1         4 pcmpgt 16, $xmm1, $xmm2;
298             }
299              
300             sub PCMPGTD($$) # Packed CoMPare Greater Than DWord
301 1     1 1 4 {my ($xmm1, $xmm2) = @_; # DWords, DWords
302 1         3 pcmpgt 32, $xmm1, $xmm2;
303             }
304              
305             sub PCMPGTQ($$) # Packed CoMPare Greater Than QWord
306 1     1 1 8 {my ($xmm1, $xmm2) = @_; # QWords, QWords
307 1         3 pcmpgt 64, $xmm1, $xmm2;
308             }
309              
310             #D1 VPCMP # Packed CoMPare
311             #D2 VPCMPEQ # Packed CoMPare EQual
312              
313             sub vpcmpeq($$$;$) #P Packed CoMPare EQual Byte|word|double|quad with optional masking
314 5     5 1 13 {my ($size, $k2, $xmm1, $xmm2) = @_; # Size in bits: 8|16|32|64 of each element, optional input mask, bytes, bytes
315              
316 5 50       12 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
317 5 100       18 require64or128or256or512 $k2 if defined $k2; # Optional mask
318 5         15 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
319              
320 5         11 my $N = length($xmm1) / $size; # Bytes|Words|Doubles|Quads in operation
321 5 100       12 if (defined $k2) # Masked operation
322 3         12 {my $k1 = maskRegister; # Result register
323 3 100       12 $k2 = substr($k2, 48) if $N == 16; # Relevant portion of register
324 3 100       8 $k2 = substr($k2, 32) if $N == 32;
325 3         10 for(0..$N-1)
326 112 100       193 {next unless substr($k2, $_, 1) eq '1';
327 16         24 my $o = $_ * $size;
328 16 100       39 substr($k1, $_, 1) = substr($xmm1, $o, $size) eq
329             substr($xmm2, $o, $size) ? '1' : '0';
330             }
331 3 100       10 return zBytes(6).substr($k1, 0, 16) if $N == 16;
332 2 100       7 return zBytes(4).substr($k1, 0, 32) if $N == 32;
333 1         6 return $k1
334             }
335              
336 2         5 my $xmm3 = zBytes $N; # Non masked operation
337 2         10 my $clear = '0' x $size;
338 2         34 my $set = '1' x $size;
339 2         9 for(0..$N-1)
340 48         54 {my $o = $_ * $size;
341 48 100       102 substr($xmm3, $o, $size) = substr($xmm1, $o, $size) eq
342             substr($xmm2, $o, $size) ? $set : $clear
343             }
344             $xmm3
345 2         12 }
346              
347             sub VPCMPEQB($$;$) # Packed CoMPare EQual Byte with optional masking
348 5 100   5 1 23 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes
349 5         14 vpcmpeq(8, $k2, $xmm1, $xmm2)
350             }
351              
352             sub VPCMPEQW($$;$) # Packed CoMPare EQual Byte with optional masking
353 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, words, words
354 0         0 vpcmpeq(16, $k2, $xmm1, $xmm2)
355             }
356              
357             sub VPCMPEQD($$;$) # Packed CoMPare EQual Byte with optional masking
358 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, dwords, dwords
359 0         0 vpcmpeq(32, $k2, $xmm1, $xmm2)
360             }
361              
362             sub VPCMPEQQ($$;$) # Packed CoMPare EQual Byte with optional masking
363 0 0   0 1 0 {my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, qwords, qwords
364 0         0 vpcmpeq(64, $k2, $xmm1, $xmm2)
365             }
366              
367             #D2 VPCMP # Packed CoMPare
368              
369             sub vpcmp($$$$$) #P Packed CoMPare
370 10     10 1 24 {my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code
371              
372 10 50       25 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
373 10         26 require64 $k2; # Mask
374 10         33 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
375 10 50       45 confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code
376              
377             my $T = # String tests
378 16 100   16   26 [sub {return 1 if compareTwosComplement($_[0], $_[1]) == 0; 0}, # eq 0
  6         21  
379 8 100   8   14 sub {return 1 if compareTwosComplement($_[0], $_[1]) == -1; 0}, # lt 1
  5         16  
380 8 100   8   14 sub {return 1 if compareTwosComplement($_[0], $_[1]) != +1; 0}, # le 2
  1         4  
381             undef,
382 16 100   16   27 sub {return 1 if compareTwosComplement($_[0], $_[1]) != 0; 0}, # ne 4
  10         27  
383 8 100   8   13 sub {return 1 if compareTwosComplement($_[0], $_[1]) != -1; 0}, # ge 5
  3         9  
384 8 100   8   13 sub {return 1 if compareTwosComplement($_[0], $_[1]) == +1; 0}, # gt 6
  7         17  
385 10         115 ];
386              
387 10         32 my $N = length($xmm1) / $size; # Number of elements
388 10         21 my $k1 = maskRegister;
389 10         30 $k2 = substr($k2, -$N); # Relevant portion of mask
390 10         29 for(0..$N-1)
391 480 100       838 {next unless substr($k2, $_, 1) eq '1'; # Mask
392 64         87 my $o = $_ * $size;
393 64 100       121 substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code
  64         115  
394             substr($xmm2, $o, $size)) ? '1' : '0';
395             }
396              
397 10         22 substr(zBytes(8).substr($k1, 0, $N), -64)
398             }
399              
400             sub VPCMPB($$$$) # Packed CoMPare Byte
401 10     10 1 28 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code
402 10         24 vpcmp 8, $k2, $xmm1, $xmm2, $op
403             }
404              
405             sub VPCMPW($$$$) # Packed CoMPare Word
406 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code
407 0         0 vpcmp 16, $k2, $xmm1, $xmm2, $op
408             }
409              
410             sub VPCMPD($$$$) # Packed CoMPare Dword
411 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code
412 0         0 vpcmp 32, $k2, $xmm1, $xmm2, $op
413             }
414              
415             sub VPCMPQ($$$$) # Packed CoMPare Qword
416 0     0 1 0 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code
417 0         0 vpcmp 64, $k2, $xmm1, $xmm2, $op
418             }
419              
420             #D2 VPCMPU # Packed CoMPare Unsigned
421              
422             sub vpcmpu($$$$$) #P Packed CoMPare Unsigned
423 46     46 1 106 {my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code
424              
425 46 50       114 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
426 46         117 require64 $k2; # Mask
427 46         167 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
428 46 50       191 confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code
429              
430             my $T = # String tests
431 50 100   50   181 [sub {return 1 if $_[0] eq $_[1]; 0}, # eq 0
  32         84  
432 42 100   42   120 sub {return 1 if $_[0] lt $_[1]; 0}, # lt 1
  29         96  
433 42 100   42   134 sub {return 1 if $_[0] le $_[1]; 0}, # le 2
  17         44  
434             undef,
435 50 100   50   163 sub {return 1 if $_[0] ne $_[1]; 0}, # ne 4
  18         50  
436 42 100   42   142 sub {return 1 if $_[0] ge $_[1]; 0}, # ge 5
  13         36  
437 42 100   42   122 sub {return 1 if $_[0] gt $_[1]; 0}, # gt 6
  25         66  
438 46         359 ];
439              
440 46         110 my $N = length($xmm1) / $size; # Number of elements
441 46         94 my $k1 = maskRegister;
442 46         132 $k2 = substr($k2, -$N); # Relevant portion of mask
443 46         155 for(0..$N-1)
444 1152 100       2084 {next unless substr($k2, $_, 1) eq '1'; # Mask
445 268         347 my $o = $_ * $size;
446 268 100       472 substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code
  268         447  
447             substr($xmm2, $o, $size)) ? '1' : '0';
448             }
449              
450 46         85 substr(zBytes(8).substr($k1, 0, $N), -64)
451             }
452              
453             sub VPCMPUB($$$$) # Packed CoMPare Unsigned Byte
454 10     10 1 28 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code
455 10         24 vpcmpu 8, $k2, $xmm1, $xmm2, $op
456             }
457              
458             sub VPCMPUW($$$$) # Packed CoMPare Unsigned Word
459 12     12 1 32 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code
460 12         34 vpcmpu 16, $k2, $xmm1, $xmm2, $op
461             }
462              
463             sub VPCMPUD($$$$) # Packed CoMPare Unsigned Dword
464 12     12 1 34 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code
465 12         32 vpcmpu 32, $k2, $xmm1, $xmm2, $op
466             }
467              
468             sub VPCMPUQ($$$$) # Packed CoMPare Unsigned Qword
469 12     12 1 39 {my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code
470 12         61 vpcmpu 64, $k2, $xmm1, $xmm2, $op
471             }
472              
473             #D1 VPTEST # Packed TEST
474             #D2 VPTESTM # Packed TEST MASK
475              
476             sub andAndTest($$) #P And two bit strings of the same length and return 0 if the result is 0 else 1
477 308     308 1 731 {my ($a, $b) = @_; # Element, element
478 308         540 my $N = requireSameLength $a, $b; # Check that the two elements have the same length
479 308         570 for(0..$N-1) # Look for match
480 4113 100 100     7778 {return 1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '1';
481             }
482             0
483 248         624 }
484              
485             sub vptest($$$) #P Packed TEST
486 31     31 1 54 {my ($size, $xmm1, $xmm2) = @_; # Size of element in bits, element, element
487              
488 31 50       65 require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
489 31         77 require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
490              
491 31         58 my $N = length($xmm1) / $size; # Number of elements
492 31         59 my $k1 = maskRegister;
493 31         114 for(0..$N-1)
494 308         425 {my $o = $_ * $size;
495 308 100       635 substr($k1, $_, 1) = andAndTest(substr($xmm1, $o, $size), # Test two elements
496             substr($xmm2, $o, $size)) ? '1' : '0';
497             }
498              
499 31         59 substr(zBytes(8).substr($k1, 0, $N), -64)
500             }
501              
502             sub VPTESTMB($$) # Packed TEST Mask Byte
503 10     10 1 23 {my ($xmm1, $xmm2) = @_; # Bytes, bytes
504 10         21 vptest 8, $xmm1, $xmm2
505             }
506              
507             sub VPTESTMW($$) # Packed TEST Mask Word
508 10     10 1 20 {my ($xmm1, $xmm2) = @_; # Words, words
509 10         18 vptest 16, $xmm1, $xmm2
510             }
511              
512             sub VPTESTMD($$) # Packed TEST Mask Dword
513 10     10 1 21 {my ($xmm1, $xmm2) = @_; # Dwords, dwords
514 10         24 vptest 32, $xmm1, $xmm2
515             }
516              
517             sub VPTESTMQ($$) # Packed TEST Mask Quad
518 1     1 1 4 {my ($xmm1, $xmm2) = @_; # Quads, quads
519 1         4 vptest 64, $xmm1, $xmm2
520             }
521              
522             #D1 VPBROADCAST # VPBROADCASTB - Packed BROADCAST Byte
523              
524             sub VPBROADCASTB($$) # Packed TEST Mask Byte
525 1     1 1 5 {my ($size, $b) = @_; # Size of target in bits, byte
526 1         7 requireNumber128or256or512 $size;
527 1         4 require8 $b;
528 1         5 repeat($b, $size / 8)
529             }
530              
531             sub VPBROADCASTW($$) # Packed TEST Mask Word
532 1     1 1 5 {my ($size, $w) = @_; # Size of target in bits, word
533 1         5 requireNumber128or256or512 $size;
534 1         5 require16 $w;
535 1         9 repeat($w, $size / 16)
536             }
537              
538             sub VPBROADCASTD($$) # Packed TEST Mask Dword
539 1     1 1 5 {my ($size, $d) = @_; # Size of target in bits, dword
540 1         5 requireNumber128or256or512 $size;
541 1         8 require32 $d;
542 1         5 repeat($d, $size / 32)
543             }
544              
545             sub VPBROADCASTQ($$) # Packed TEST Mask Quad
546 1     1 1 5 {my ($size, $q) = @_; # Size of target in bits, byte
547 1         7 requireNumber128or256or512 $size;
548 1         4 require64 $q;
549 1         5 repeat($q, $size / 64)
550             }
551              
552             #D1 VPINSR # Packed INSeRt
553              
554             sub VPINSRB($$$) # Packed INSeRt Byte
555 1     1 1 3 {my ($target, $byte, $pos) = @_; # Target element, byte, position to insert byte expressed as number of bytes from lowest order byte numbered 0
556 1         5 require128or256or512 $target;
557 1         3 require8 $byte;
558 1 50 33     9 confess "Invalid position $pos" if $pos < 0 or $pos > length($target);
559 1         5 substr($target, -($pos+1)*8, 8) = $byte;
560 1         5 $target
561             }
562              
563             sub VPINSRW($$$) # Packed INSeRt Word
564 1     1 1 5 {my ($target, $word, $pos) = @_; # Target element, word, position to insert byte expressed as number of words from lowest order word numbered 0
565 1         4 require128or256or512 $target;
566 1         4 require16 $word;
567 1 50 33     12 confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 2;
568 1         5 substr($target, -($pos+1)*16, 16) = $word;
569 1         5 $target
570             }
571              
572             sub VPINSRD($$$) # Packed INSeRt Dword
573 1     1 1 5 {my ($target, $dword, $pos) = @_; # Target element, dword, position to insert byte expressed as number of dwords from lowest order dword numbered 0
574 1         4 require128or256or512 $target;
575 1         3 require32 $dword;
576 1 50 33     10 confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 4;
577 1         4 substr($target, -($pos+1)*32, 32) = $dword;
578 1         5 $target
579             }
580              
581             sub VPINSRQ($$$) # Packed INSeRt Quad
582 1     1 1 4 {my ($target, $qword, $pos) = @_; # Target element, qword, position to insert byte expressed as number of dwords from lowest order qword numbered 0
583 1         4 require128or256or512 $target;
584 1         3 require64 $qword;
585 1 50 33     9 confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 8;
586 1         5 substr($target, -($pos+1)*64, 64) = $qword;
587 1         4 $target
588             }
589              
590             #D1 VPLZCNT # Packed Leading Zero CouNT
591              
592             sub VPLZCNTD($) # Packed Leading Zero CouNT Dword
593 1     1 1 3 {my ($target) = @_; # Target element
594 1         4 require128or256or512 $target;
595 1         2 my $r = '';
596 1         4 my $n = length($target) / 32;
597 1         5 for(0..$n-1)
598 16         57 {my $b = substr($target, $_*32, 32) =~ s(1.*\Z) ()sr;
599 16         50 $r .= sprintf("%032b", length $b);
600             }
601             $r
602 1         6 }
603              
604             sub VPLZCNTQ($) # Packed Leading Zero CouNT Qword
605 1     1 1 4 {my ($target) = @_; # Target element
606 1         4 require128or256or512 $target;
607 1         2 my $r = '';
608 1         4 my $n = length($target) / 64;
609 1         5 for(0..$n-1)
610 8         31 {my $b = substr($target, $_*64, 64) =~ s(1.*\Z) ()sr;
611 8         28 $r .= sprintf("%064b", length $b);
612             }
613             $r
614 1         4 }
615              
616             #D1 Compress and Expand # Compress or expand
617             #D2 VPCOMPRESS # Packed COMPRESS
618              
619             sub vpcompress($$$$$) #P Packed COMPRESS
620 2     2 1 7 {my ($size, $xmm1, $k2, $z, $xmm2) = @_; # Size of each element in bits, Compression target, compression mask, clear upper elements, source to compress
621 2         5 require64 $k2;
622 2         7 my $n = require128or256or512 $xmm1, $xmm2;
623 2         6 my $N = $n / $size; # Number of elements
624 2 50       6 $xmm1 = '0' x length $xmm1 if $z; # Clear target if requested
625 2         3 my $p = 0; # Position in target
626 2         6 for(1..$N) # Compress selected elements
627 6 100       17 {if (substr($k2, -$_, 1) eq '1')
628 3         11 {substr($xmm1, --$p * $size, $size) = substr($xmm2, -$_ * $size, $size)
629             }
630             }
631             $xmm1
632 2         9 }
633              
634             sub VPCOMPRESSD($$$$) # Packed COMPRESS Dword
635 1     1 1 5 {my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, compression mask, clear upper elements, source to compress
636 1         4 vpcompress 32, $xmm1, $k2, $z, $xmm2
637             }
638              
639             sub VPCOMPRESSQ($$$$) # Packed COMPRESS Qword
640 1     1 1 4 {my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, compression mask, clear upper elements, source to compress
641 1         3 vpcompress 64, $xmm1, $k2, $z, $xmm2
642             }
643              
644             #D2 VPEXPAND # Packed EXPAND
645              
646             sub vpexpand($$$$$) #P Packed EXPAND
647 2     2 1 5 {my ($size, $xmm1, $k2, $z, $xmm2) = @_; # Size of each element in bits, Compression target, expansion mask, clear upper elements, source to expand
648 2         8 require64 $k2;
649 2         5 my $n = require128or256or512 $xmm1, $xmm2;
650 2         5 my $N = $n / $size; # Number of elements
651 2 50       7 $xmm1 = '0' x length $xmm1 if $z; # Clear target if requested
652 2         3 my $p = 0; # Position in target
653 2         6 for(1..$N) # Compress selected elements
654 6 100       16 {if (substr($k2, -$_, 1) eq '1')
655 3         10 {substr($xmm2, -$_ * $size, $size) = substr($xmm1, --$p * $size, $size)
656             }
657             }
658             $xmm1
659 2         8 }
660              
661             sub VPEXPANDD($$$$) # Packed EXPAND Dword
662 1     1 1 4 {my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, expansion mask, clear upper elements, source to expand
663 1         5 vpexpand 32, $xmm1, $k2, $z, $xmm2
664             }
665              
666             sub VPEXPANDQ($$$$) # Packed EXPAND Qword
667 1     1 1 4 {my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, expansion mask, clear upper elements, source to expand
668 1         4 vpexpand 64, $xmm1, $k2, $z, $xmm2
669             }
670              
671             #D0
672             #-------------------------------------------------------------------------------
673             # Export
674             #-------------------------------------------------------------------------------
675              
676 1     1   12 use Exporter qw(import);
  1         3  
  1         54  
677              
678 1     1   6 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         2  
  1         653  
679              
680             @ISA = qw(Exporter);
681             @EXPORT_OK = qw(
682             );
683             %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
684              
685             # podDocumentation
686              
687             =pod
688              
689             =encoding utf-8
690              
691             =head1 Name
692              
693             Simd::Avx512 - Emulate SIMD Avx512 instructions
694              
695             =head1 Synopsis
696              
697             Help needed please!
698              
699             The instructions being emulated are illustrated at: L
700             The instructions being emulated are described at: L
701              
702             =head2 Example
703              
704             Find the number of leading zeros in each of 8 quad words.
705              
706             if (1) {
707             my ($i, $od, $oq) = (
708             #Q0 1 2 3 4 5 6 7 8
709             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
710             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
711             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
712             #b
713            
714            
715            
716             );
717              
718             is_deeply VPLZCNTD($i), $od;
719             is_deeply VPLZCNTQ($i), $oq;
720             }
721              
722             =head1 Description
723              
724             Emulate SIMD Avx512 instructions
725              
726              
727             Version 20210129.
728              
729              
730             The following sections describe the methods in each functional area of this
731             module. For an alphabetic listing of all methods by name see L.
732              
733              
734              
735             =head1 Instructions
736              
737             Emulation of Avx512 instructions
738              
739             =head2 PSLLDQ($xmm1, $imm8)
740              
741             Packed Shift Left Logical DoubleQword
742              
743             Parameter Description
744             1 $xmm1 Bytes
745             2 $imm8 Length of shift in bytes
746              
747             B
748              
749              
750              
751             is_deeply PSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
752              
753             #Q0 1 2 3 4 5 6 7 8
754             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
755             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
756             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
757             #b
758             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
759             ,2),
760             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
761              
762              
763             =head2 VPSLLDQ($xmm1, $imm8)
764              
765             Packed Shift Left Logical DoubleQword
766              
767             Parameter Description
768             1 $xmm1 Bytes
769             2 $imm8 Length of shift in bytes
770              
771             B
772              
773              
774              
775             is_deeply VPSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
776              
777             #Q0 1 2 3 4 5 6 7 8
778             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
779             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
780             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
781             #b
782             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
783             ,2),
784             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
785              
786              
787             is_deeply VPSLLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
788              
789             #Q0 1 2 3 4 5 6 7 8
790             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
791             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
792             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
793             #b
794             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
795             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
796             ,2),
797             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
798             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
799              
800              
801             is_deeply VPSLLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
802              
803             #Q0 1 2 3 4 5 6 7 8
804             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
805             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
806             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
807             #b
808             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
809             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
810             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
811             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
812             ,2),
813             '00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
814             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
815             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
816             .'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000'
817              
818              
819             =head2 PSRLDQ($xmm1, $imm8)
820              
821             Packed Shift Right Logical DoubleQword
822              
823             Parameter Description
824             1 $xmm1 Bytes
825             2 $imm8 Length of shift
826              
827             B
828              
829              
830              
831             is_deeply PSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
832              
833             #Q0 1 2 3 4 5 6 7 8
834             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
835             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
836             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
837             #b
838             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
839             ,2),
840             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
841              
842              
843             =head2 VPSRLDQ($xmm1, $imm8)
844              
845             Packed Shift Right Logical DoubleQword
846              
847             Parameter Description
848             1 $xmm1 Bytes
849             2 $imm8 Length of shift
850              
851             B
852              
853              
854              
855             is_deeply VPSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
856              
857             #Q0 1 2 3 4 5 6 7 8
858             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
859             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
860             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
861             #b
862             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
863             ,2),
864             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
865              
866              
867             is_deeply VPSRLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
868              
869             #Q0 1 2 3 4 5 6 7 8
870             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
871             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
872             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
873             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
874             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
875             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
876             ,2),
877             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
878             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
879              
880              
881             is_deeply VPSRLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
882              
883             #Q0 1 2 3 4 5 6 7 8
884             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
885             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
886             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
887             #b
888             '11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
889             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
890             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
891             .'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111'
892             ,2),
893             '00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
894             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
895             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
896             .'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000'
897              
898              
899             =head1 PCMP
900              
901             Packed CoMPare
902              
903             =head2 PCMPEQ
904              
905             Packed CoMPare EQual
906              
907             =head3 PCMPEQB($xmm1, $xmm2)
908              
909             Packed CoMPare EQual Byte
910              
911             Parameter Description
912             1 $xmm1 Bytes
913             2 $xmm2 Bytes
914              
915             B
916              
917              
918              
919             is_deeply PCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
920              
921             #Q0 1 2 3 4 5 6 7 8
922             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
923             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
924             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
925             #b
926             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
927             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
928             ),
929             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
930              
931              
932             =head3 PCMPEQW($xmm1, $xmm2)
933              
934             Packed CoMPare EQual Word
935              
936             Parameter Description
937             1 $xmm1 Words
938             2 $xmm2 Words
939              
940             B
941              
942              
943              
944             is_deeply PCMPEQW( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
945              
946             #Q0 1 2 3 4 5 6 7 8
947             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
948             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
949             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
950             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
951             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
952             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
953             ),
954             '00000000000000001111111111111111000000000000000000000000000000001111111111111111111111111111111111111111111111110000000000000000'
955              
956              
957             =head3 PCMPEQD($xmm1, $xmm2)
958              
959             Packed CoMPare EQual DWord
960              
961             Parameter Description
962             1 $xmm1 DWords
963             2 $xmm2 DWords
964              
965             B
966              
967              
968              
969             is_deeply PCMPEQD( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
970              
971             #Q0 1 2 3 4 5 6 7 8
972             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
973             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
974             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
975             #b
976             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
977             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
978             ),
979             '00000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111100000000000000000000000000000000'
980              
981              
982             =head3 PCMPEQQ($xmm1, $xmm2)
983              
984             Packed CoMPare EQual QWord
985              
986             Parameter Description
987             1 $xmm1 QWords
988             2 $xmm2 QWords
989              
990             B
991              
992              
993              
994             is_deeply PCMPEQQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
995              
996             #Q0 1 2 3 4 5 6 7 8
997             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
998             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
999             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1000             #b
1001             '11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011110'
1002             ,'11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011010'
1003             ),
1004             '11111111111111111111111111111111111111111111111111111111111111110000000000000000000000000000000000000000000000000000000000000000'
1005              
1006              
1007             =head2 PCMPGT
1008              
1009             Packed CoMPare Greater Than
1010              
1011             =head3 PCMPGTB($xmm1, $xmm2)
1012              
1013             Packed CoMPare Greater Than Byte
1014              
1015             Parameter Description
1016             1 $xmm1 Bytes
1017             2 $xmm2 Bytes
1018              
1019             B
1020              
1021              
1022              
1023             is_deeply PCMPGTB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1024              
1025             #Q0 1 2 3 4 5 6 7 8
1026             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1027             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1028             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1029             #b
1030             '11100001000010000000000011000000010000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1031             ,'11100001000001000000000001000000100000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
1032             ),
1033             '00000000111111110000000000000000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000011111111'
1034              
1035              
1036             =head3 PCMPGTW($xmm1, $xmm2)
1037              
1038             Packed CoMPare Greater Than Word
1039              
1040             Parameter Description
1041             1 $xmm1 Words
1042             2 $xmm2 Words
1043              
1044             B
1045              
1046              
1047              
1048             is_deeply PCMPGTW( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1049              
1050             #Q0 1 2 3 4 5 6 7 8
1051             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1052             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1053             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1054             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
1055             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1056             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000000111110000000000000000000000000000000011010'
1057             ),
1058             '11111111111111110000000000000000111111111111111100000000000000000000000000000000111111111111111100000000000000001111111111111111'
1059              
1060              
1061             =head3 PCMPGTD($xmm1, $xmm2)
1062              
1063             Packed CoMPare Greater Than DWord
1064              
1065             Parameter Description
1066             1 $xmm1 DWords
1067             2 $xmm2 DWords
1068              
1069             B
1070              
1071              
1072              
1073             is_deeply PCMPGTD( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1074              
1075             #Q0 1 2 3 4 5 6 7 8
1076             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1077             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1078             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1079             #b
1080             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1081             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
1082             ),
1083             '11111111111111111111111111111111111111111111111111111111111111110000000000000000000000000000000011111111111111111111111111111111'
1084              
1085              
1086             =head3 PCMPGTQ($xmm1, $xmm2)
1087              
1088             Packed CoMPare Greater Than QWord
1089              
1090             Parameter Description
1091             1 $xmm1 QWords
1092             2 $xmm2 QWords
1093              
1094             B
1095              
1096              
1097              
1098             is_deeply PCMPGTQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1099              
1100             #Q0 1 2 3 4 5 6 7 8
1101             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1102             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1103             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1104             #b
1105             '11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011110'
1106             ,'11100001000011000000000011000000000000011000000000110000000000001100000000000000000001111110000000000000000000000000000000011010'
1107             ),
1108             '00000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111'
1109              
1110              
1111             =head1 VPCMP
1112              
1113             Packed CoMPare
1114              
1115             =head2 VPCMPEQ
1116              
1117             Packed CoMPare EQual
1118              
1119             =head3 VPCMPEQB($k2, $xmm1, $xmm2)
1120              
1121             Packed CoMPare EQual Byte with optional masking
1122              
1123             Parameter Description
1124             1 $k2 Optional input mask
1125             2 $xmm1 Bytes
1126             3 $xmm2 Bytes
1127              
1128             B
1129              
1130              
1131              
1132             is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1133              
1134             #Q0 1 2 3 4 5 6 7 8
1135             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1136             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1137             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1138             #b
1139             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1140             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
1141             ),
1142             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
1143              
1144              
1145             is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1146              
1147             #Q0 1 2 3 4 5 6 7 8
1148             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1149             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1150             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1151             #b
1152             '11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1153             .'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110'
1154             ,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
1155             .'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010'
1156             ),
1157             '11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
1158             .'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000'
1159              
1160              
1161             is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1162              
1163             #Q0 1 2 3 4 5 6 7 8
1164             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1165             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1166             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1167             #b
1168             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1169             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1170             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1171             ),
1172             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
1173              
1174              
1175             is_deeply VPCMPEQB( # 256 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1176              
1177             #Q0 1 2 3 4 5 6 7 8
1178             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1179             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1180             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1181             #b
1182             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1183             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1184             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1185             ),
1186             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
1187              
1188              
1189             is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1190              
1191             #Q0 1 2 3 4 5 6 7 8
1192             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1193             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1194             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1195             #b
1196             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1197            
1198            
1199             ),
1200             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1';
1201              
1202              
1203             =head3 VPCMPEQW($k2, $xmm1, $xmm2)
1204              
1205             Packed CoMPare EQual Byte with optional masking
1206              
1207             Parameter Description
1208             1 $k2 Optional input mask
1209             2 $xmm1 Words
1210             3 $xmm2 Words
1211              
1212             =head3 VPCMPEQD($k2, $xmm1, $xmm2)
1213              
1214             Packed CoMPare EQual Byte with optional masking
1215              
1216             Parameter Description
1217             1 $k2 Optional input mask
1218             2 $xmm1 Dwords
1219             3 $xmm2 Dwords
1220              
1221             =head3 VPCMPEQQ($k2, $xmm1, $xmm2)
1222              
1223             Packed CoMPare EQual Byte with optional masking
1224              
1225             Parameter Description
1226             1 $k2 Optional input mask
1227             2 $xmm1 Qwords
1228             3 $xmm2 Qwords
1229              
1230             =head2 VPCMP
1231              
1232             Packed CoMPare
1233              
1234             =head3 VPCMPB($k2, $xmm1, $xmm2, $op)
1235              
1236             Packed CoMPare Byte
1237              
1238             Parameter Description
1239             1 $k2 Input mask
1240             2 $xmm1 Bytes
1241             3 $xmm2 Bytes
1242             4 $op Test code
1243              
1244             B
1245              
1246              
1247             my ($mi, $mo, $o1, $o2) = ( # 128
1248             #Q0 1 2 3 4 5 6 7 8
1249             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1250             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1251             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1252             #b
1253             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1254             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1255             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1256             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1257             );
1258              
1259              
1260             is_deeply VPCMPB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1261              
1262              
1263             is_deeply VPCMPB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1264              
1265              
1266             my ($mi, $mo, $o1, $o2) = ( # 256
1267             #Q0 1 2 3 4 5 6 7 8
1268             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1269             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1270             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1271             #b
1272             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1273             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1274             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1275             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1276             );
1277              
1278             is_deeply VPCMPB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1279              
1280              
1281             is_deeply VPCMPB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1282              
1283              
1284             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1285             #Q0 1 2 3 4 5 6 7 8
1286             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1287             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1288             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1289             #b
1290             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1291             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1292             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1293             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1294            
1295            
1296             );
1297              
1298             is_deeply VPCMPB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1299              
1300              
1301             is_deeply VPCMPB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1302              
1303              
1304             is_deeply VPCMPB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1305              
1306              
1307             is_deeply VPCMPB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1308              
1309              
1310             is_deeply VPCMPB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1311              
1312              
1313             is_deeply VPCMPB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1314              
1315              
1316              
1317             =head3 VPCMPW($k2, $xmm1, $xmm2, $op)
1318              
1319             Packed CoMPare Word
1320              
1321             Parameter Description
1322             1 $k2 Input mask
1323             2 $xmm1 Words
1324             3 $xmm2 Words
1325             4 $op Test code
1326              
1327             =head3 VPCMPD($k2, $xmm1, $xmm2, $op)
1328              
1329             Packed CoMPare Dword
1330              
1331             Parameter Description
1332             1 $k2 Input mask
1333             2 $xmm1 Dwords
1334             3 $xmm2 Dwords
1335             4 $op Test code
1336              
1337             =head3 VPCMPQ($k2, $xmm1, $xmm2, $op)
1338              
1339             Packed CoMPare Qword
1340              
1341             Parameter Description
1342             1 $k2 Input mask
1343             2 $xmm1 Qwords
1344             3 $xmm2 Qwords
1345             4 $op Test code
1346              
1347             =head2 VPCMPU
1348              
1349             Packed CoMPare Unsigned
1350              
1351             =head3 VPCMPUB($k2, $xmm1, $xmm2, $op)
1352              
1353             Packed CoMPare Unsigned Byte
1354              
1355             Parameter Description
1356             1 $k2 Input mask
1357             2 $xmm1 Bytes
1358             3 $xmm2 Bytes
1359             4 $op Test code
1360              
1361             B
1362              
1363              
1364             my ($mi, $mo, $o1, $o2) = ( # 128
1365             #Q0 1 2 3 4 5 6 7 8
1366             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1367             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1368             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1369             #b
1370             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1371             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1372             '00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1373             '10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1374             );
1375              
1376              
1377             is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1378              
1379              
1380             is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1381              
1382              
1383             my ($mi, $mo, $o1, $o2) = ( # 256
1384             #Q0 1 2 3 4 5 6 7 8
1385             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1386             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1387             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1388             #b
1389             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1390             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1391             '0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1392             '1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
1393             );
1394              
1395             is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1396              
1397              
1398             is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1399              
1400              
1401             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1402             #Q0 1 2 3 4 5 6 7 8
1403             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1404             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1405             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1406             #b
1407             '1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1408             '1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1409             '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1410             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1411            
1412            
1413             );
1414              
1415             is_deeply VPCMPUB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1416              
1417              
1418             is_deeply VPCMPUB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1419              
1420              
1421             is_deeply VPCMPUB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1422              
1423              
1424             is_deeply VPCMPUB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1425              
1426              
1427             is_deeply VPCMPUB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1428              
1429              
1430             is_deeply VPCMPUB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1431              
1432              
1433              
1434             =head3 VPCMPUW($k2, $xmm1, $xmm2, $op)
1435              
1436             Packed CoMPare Unsigned Word
1437              
1438             Parameter Description
1439             1 $k2 Input mask
1440             2 $xmm1 Words
1441             3 $xmm2 Words
1442             4 $op Test code
1443              
1444             B
1445              
1446              
1447             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1448             #Q0 1 2 3 4 5 6 7 8
1449             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1450             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1451             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1452             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678
1453             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1454             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1455             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1456             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1457            
1458            
1459             );
1460             for my $i(\($mi, $meq, $mlt, $mgt))
1461             {$$i = zBytes(4).$$i;
1462             }
1463              
1464             is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1465              
1466              
1467             is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1468              
1469              
1470             is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1471              
1472              
1473             is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1474              
1475              
1476             is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1477              
1478              
1479             is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1480              
1481              
1482             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1483             #Q0 1 2 3 4 5 6 7 8
1484             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1485             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1486             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1487             #b
1488             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1489             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1490             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1491             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1492            
1493            
1494             );
1495             for my $i(\($mi, $meq, $mlt, $mgt))
1496             {$$i = zBytes(4).$$i;
1497             }
1498              
1499             is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1500              
1501              
1502             is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1503              
1504              
1505             is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1506              
1507              
1508             is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1509              
1510              
1511             is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1512              
1513              
1514             is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1515              
1516              
1517              
1518             =head3 VPCMPUD($k2, $xmm1, $xmm2, $op)
1519              
1520             Packed CoMPare Unsigned Dword
1521              
1522             Parameter Description
1523             1 $k2 Input mask
1524             2 $xmm1 Dwords
1525             3 $xmm2 Dwords
1526             4 $op Test code
1527              
1528             B
1529              
1530              
1531             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1532             #Q0 1 2 3 4 5 6 7 8
1533             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1534             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1535             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1536             #b
1537             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1538             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1539             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1540             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1541            
1542            
1543             );
1544             for my $i(\($mi, $meq, $mlt, $mgt))
1545             {$$i = zBytes(6).$$i;
1546             }
1547              
1548             is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1549              
1550              
1551             is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1552              
1553              
1554             is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1555              
1556              
1557             is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1558              
1559              
1560             is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1561              
1562              
1563             is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1564              
1565              
1566             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1567             #Q0 1 2 3 4 5 6 7 8
1568             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1569             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1570             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1571             #b
1572             '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1573             '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1574             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0',
1575             '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1576            
1577            
1578             );
1579             for my $i(\($mi, $meq, $mlt, $mgt))
1580             {$$i = zBytes(6).$$i;
1581             }
1582              
1583             is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1584              
1585              
1586             is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1587              
1588              
1589             is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1590              
1591              
1592             is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1593              
1594              
1595             is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1596              
1597              
1598             is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1599              
1600              
1601              
1602             =head3 VPCMPUQ($k2, $xmm1, $xmm2, $op)
1603              
1604             Packed CoMPare Unsigned Qword
1605              
1606             Parameter Description
1607             1 $k2 Input mask
1608             2 $xmm1 Qwords
1609             3 $xmm2 Qwords
1610             4 $op Test code
1611              
1612             B
1613              
1614              
1615             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1616             #Q0 1 2 3 4 5 6 7 8
1617             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1618             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1619             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1620             #b
1621             '1'. '1'. '1'. '1'. '1'. '0'. '0'. '1',
1622             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0',
1623             '1'. '0'. '1'. '1'. '0'. '0'. '0'. '0',
1624             '0'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1625            
1626            
1627             );
1628             for my $i(\($mi, $meq, $mlt, $mgt))
1629             {$$i = zBytes(7).$$i;
1630             }
1631              
1632             is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1633              
1634              
1635             is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1636              
1637              
1638             is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1639              
1640              
1641             is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1642              
1643              
1644             is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1645              
1646              
1647             is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1648              
1649              
1650             my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512
1651             #Q0 1 2 3 4 5 6 7 8
1652             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1653             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1654             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1655             #b
1656             '1'. '1'. '1'. '1'. '1'. '0'. '0'. '1',
1657             '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0',
1658             '1'. '0'. '1'. '1'. '0'. '0'. '0'. '0',
1659             '0'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1660            
1661            
1662             );
1663             for my $i(\($mi, $meq, $mlt, $mgt))
1664             {$$i = zBytes(7).$$i;
1665             }
1666              
1667             is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1668              
1669              
1670             is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1671              
1672              
1673             is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1674              
1675              
1676             is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1677              
1678              
1679             is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1680              
1681              
1682             is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1683              
1684              
1685              
1686             =head1 VPTEST
1687              
1688             Packed TEST
1689              
1690             =head2 VPTESTM
1691              
1692             Packed TEST MASK
1693              
1694             =head3 VPTESTMB($xmm1, $xmm2)
1695              
1696             Packed TEST Mask Byte
1697              
1698             Parameter Description
1699             1 $xmm1 Bytes
1700             2 $xmm2 Bytes
1701              
1702             B
1703              
1704              
1705             my ($o1, $o2, $k1) = ( # 128
1706             #Q0 1 2
1707             #D0 1 2 3 4
1708             #W0 1 2 3 4 5 6 7 0
1709             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
1710             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
1711             '00000001000010000000000011000000000100000000001000010010000000000001100000000000000010100000101000011000000000111111111100010000',
1712             '10000001000010000000100011001000000001000001000000001100000001000000000000010010000101000001010000000110000111000000000000010000',
1713             '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1',
1714             );
1715              
1716             is_deeply VPTESTMB($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1717              
1718              
1719              
1720             =head3 VPTESTMW($xmm1, $xmm2)
1721              
1722             Packed TEST Mask Word
1723              
1724             Parameter Description
1725             1 $xmm1 Words
1726             2 $xmm2 Words
1727              
1728             B
1729              
1730              
1731             my ($o1, $o2, $k1) = ( # 256
1732             #Q0 1 2 3 4 5 6 7 8
1733             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1734             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1735             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1736             #b
1737             '0000110100001000000000001100000000010000000000000001001000000000000000000000000000000000110000000000000000000000000011000001000100000001000010000000000011000000000100000000000000010010000000000000010000000000000000000000000000000000000000000000000000010000',
1738             '0000000101001000000000001100000000010000110000000001001000000000000000000000000000000011000000000000000110000000000000010001000100000001100010000000000011000000000100000010000000010010001000000000000000010000000000000100000000000001100000000000000000010000',
1739             '1'. '1'. '1'. '1'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '1',
1740             );
1741              
1742              
1743             is_deeply VPTESTMW($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1744              
1745              
1746              
1747             =head3 VPTESTMD($xmm1, $xmm2)
1748              
1749             Packed TEST Mask Dword
1750              
1751             Parameter Description
1752             1 $xmm1 Dwords
1753             2 $xmm2 Dwords
1754              
1755             B
1756              
1757              
1758             my ($o1, $o2, $k1) = ( # 512
1759             #Q0 1 2 3 4 5 6 7 8
1760             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1761             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1762             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1763             #b
1764            
1765            
1766             '1'. '1'. '0'. '1'. '1'. '1'. '0'. '1'. '1'. '0'. '1'. '1'. '0'. '0'. '0'. '1',
1767             );
1768              
1769              
1770             is_deeply VPTESTMD($o1, $o2), zBytes(6).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1771              
1772              
1773              
1774             =head3 VPTESTMQ($xmm1, $xmm2)
1775              
1776             Packed TEST Mask Quad
1777              
1778             Parameter Description
1779             1 $xmm1 Quads
1780             2 $xmm2 Quads
1781              
1782             B
1783              
1784              
1785             my ($o1, $o2, $k1) = ( # 512
1786             #Q0 1 2 3 4 5 6 7 8
1787             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1788             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1789             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1790             #b
1791            
1792            
1793             '1'. '0'. '0'. '0'. '1'. '0'. '0'. '1',
1794             );
1795              
1796              
1797             is_deeply VPTESTMQ($o1, $o2), zBytes(7).$k1; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1798              
1799              
1800              
1801             =head1 VPBROADCAST
1802              
1803             VPBROADCASTB - Packed BROADCAST Byte
1804              
1805             =head2 VPBROADCASTB($size, $b)
1806              
1807             Packed TEST Mask Byte
1808              
1809             Parameter Description
1810             1 $size Size of target in bits
1811             2 $b Byte
1812              
1813             B
1814              
1815              
1816             my $b = '00010011' x 64;
1817              
1818             is_deeply VPBROADCASTB(512, '00010011'), $b; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1819              
1820             is_deeply VPBROADCASTW(512, '0001001100010011'), $b;
1821             is_deeply VPBROADCASTD(512, '00010011000100110001001100010011'), $b;
1822             is_deeply VPBROADCASTQ(512, '0001001100010011000100110001001100010011000100110001001100010011'), $b;
1823              
1824              
1825             =head2 VPBROADCASTW($size, $w)
1826              
1827             Packed TEST Mask Word
1828              
1829             Parameter Description
1830             1 $size Size of target in bits
1831             2 $w Word
1832              
1833             =head2 VPBROADCASTD($size, $d)
1834              
1835             Packed TEST Mask Dword
1836              
1837             Parameter Description
1838             1 $size Size of target in bits
1839             2 $d Dword
1840              
1841             =head2 VPBROADCASTQ($size, $q)
1842              
1843             Packed TEST Mask Quad
1844              
1845             Parameter Description
1846             1 $size Size of target in bits
1847             2 $q Byte
1848              
1849             =head1 VPINSR
1850              
1851             Packed INSeRt
1852              
1853             =head2 VPINSRB($target, $byte, $pos)
1854              
1855             Packed INSeRt Byte
1856              
1857             Parameter Description
1858             1 $target Target element
1859             2 $byte Byte
1860             3 $pos Position to insert byte expressed as number of bytes from lowest order byte numbered 0
1861              
1862             B
1863              
1864              
1865             my ($i, $ob, $ow, $od, $oq) = ( # 512
1866             #Q0 1 2 3 4 5 6 7 8
1867             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1868             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1869             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1870             #b
1871            
1872            
1873            
1874            
1875             '11111111000000001111111100000000111111110000000011111111000000000001000000010000000100000001000000010000000100000001000000010000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000',
1876             );
1877              
1878              
1879             is_deeply VPINSRB($i, '00010000', 60), $ob; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1880              
1881             is_deeply VPINSRW($i, '0001000000010000', 30), $ow;
1882             is_deeply VPINSRD($i, '00010000000100000001000000010000', 14), $od;
1883             is_deeply VPINSRQ($i, '0001000000010000000100000001000000010000000100000001000000010000', 6), $oq;
1884              
1885              
1886             =head2 VPINSRW($target, $word, $pos)
1887              
1888             Packed INSeRt Word
1889              
1890             Parameter Description
1891             1 $target Target element
1892             2 $word Word
1893             3 $pos Position to insert byte expressed as number of words from lowest order word numbered 0
1894              
1895             B
1896              
1897              
1898             my ($i, $ob, $ow, $od, $oq) = ( # 512
1899             #Q0 1 2 3 4 5 6 7 8
1900             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1901             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1902             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1903             #b
1904             '11111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000',
1905            
1906            
1907            
1908            
1909             );
1910              
1911             is_deeply VPINSRB($i, '00010000', 60), $ob;
1912              
1913             is_deeply VPINSRW($i, '0001000000010000', 30), $ow; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1914              
1915             is_deeply VPINSRD($i, '00010000000100000001000000010000', 14), $od;
1916             is_deeply VPINSRQ($i, '0001000000010000000100000001000000010000000100000001000000010000', 6), $oq;
1917              
1918              
1919             =head2 VPINSRD($target, $dword, $pos)
1920              
1921             Packed INSeRt Dword
1922              
1923             Parameter Description
1924             1 $target Target element
1925             2 $dword Dword
1926             3 $pos Position to insert byte expressed as number of dwords from lowest order dword numbered 0
1927              
1928             B
1929              
1930              
1931             my ($i, $ob, $ow, $od, $oq) = ( # 512
1932             #Q0 1 2 3 4 5 6 7 8
1933             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1934             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1935             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1936             #b
1937            
1938            
1939            
1940            
1941            
1942             );
1943              
1944             is_deeply VPINSRB($i, '00010000', 60), $ob;
1945             is_deeply VPINSRW($i, '0001000000010000', 30), $ow;
1946              
1947             is_deeply VPINSRD($i, '00010000000100000001000000010000', 14), $od; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1948              
1949             is_deeply VPINSRQ($i, '0001000000010000000100000001000000010000000100000001000000010000', 6), $oq;
1950              
1951              
1952             =head2 VPINSRQ($target, $qword, $pos)
1953              
1954             Packed INSeRt Quad
1955              
1956             Parameter Description
1957             1 $target Target element
1958             2 $qword Qword
1959             3 $pos Position to insert byte expressed as number of dwords from lowest order qword numbered 0
1960              
1961             B
1962              
1963              
1964             my ($i, $ob, $ow, $od, $oq) = ( # 512
1965             #Q0 1 2 3 4 5 6 7 8
1966             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1967             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1968             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
1969             #b
1970             '11111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000111111110000000011111111000000001111111100000000',
1971            
1972            
1973            
1974            
1975             );
1976              
1977             is_deeply VPINSRB($i, '00010000', 60), $ob;
1978             is_deeply VPINSRW($i, '0001000000010000', 30), $ow;
1979             is_deeply VPINSRD($i, '00010000000100000001000000010000', 14), $od;
1980              
1981             is_deeply VPINSRQ($i, '0001000000010000000100000001000000010000000100000001000000010000', 6), $oq; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1982              
1983              
1984              
1985             =head1 VPLZCNT
1986              
1987             Packed Leading Zero CouNT
1988              
1989             =head2 VPLZCNTD($target)
1990              
1991             Packed Leading Zero CouNT Dword
1992              
1993             Parameter Description
1994             1 $target Target element
1995              
1996             B
1997              
1998              
1999             my ($i, $od, $oq) = ( # 512
2000             #Q0 1 2 3 4 5 6 7 8
2001             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2002             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2003             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2004             #b
2005            
2006            
2007             '00000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000',
2008             );
2009              
2010              
2011             is_deeply VPLZCNTD($i), $od; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2012              
2013             is_deeply VPLZCNTQ($i), $oq;
2014              
2015              
2016             =head2 VPLZCNTQ($target)
2017              
2018             Packed Leading Zero CouNT Qword
2019              
2020             Parameter Description
2021             1 $target Target element
2022              
2023             B
2024              
2025              
2026             my ($i, $od, $oq) = ( # 512
2027             #Q0 1 2 3 4 5 6 7 8
2028             #D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2029             #W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2030             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8
2031             #b
2032            
2033            
2034            
2035             );
2036              
2037             is_deeply VPLZCNTD($i), $od;
2038              
2039             is_deeply VPLZCNTQ($i), $oq; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2040              
2041              
2042              
2043             =head1 Compress and Expand
2044              
2045             Compress or expand
2046              
2047             =head2 VPCOMPRESS
2048              
2049             Packed COMPRESS
2050              
2051             =head3 VPCOMPRESSD($xmm1, $k2, $z, $xmm2)
2052              
2053             Packed COMPRESS Dword
2054              
2055             Parameter Description
2056             1 $xmm1 Compression target
2057             2 $k2 Compression mask
2058             3 $z Clear upper elements
2059             4 $xmm2 Source to compress
2060              
2061             B
2062              
2063              
2064             my ($m, $i, $o, $p) = ( # 128
2065             #Q0 1 2
2066             #D0 1 2 3 4
2067             #W0 1 2 3 4 5 6 7 0
2068             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
2069             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
2070             '1'. '0'. '1'. '0',
2071             '11000000000000000000000000000000111111111111111111111111111111101000000000000000000000000000000011111111111111111111111111111111',
2072             '00000000000000000000000000000000111111111111111000000000000000001100000000000000000000000000000010000000000000000000000000000000',
2073             '11000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000',
2074             );
2075             $m = zBytes(7).'0000'.$m; # Zero pad mask
2076              
2077             is_deeply VPCOMPRESSD($o, $m, 0, $i), $o; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2078              
2079             is_deeply VPEXPANDD ($p, $m, 0, $o), $p;
2080              
2081              
2082             =head3 VPCOMPRESSQ($xmm1, $k2, $z, $xmm2)
2083              
2084             Packed COMPRESS Qword
2085              
2086             Parameter Description
2087             1 $xmm1 Compression target
2088             2 $k2 Compression mask
2089             3 $z Clear upper elements
2090             4 $xmm2 Source to compress
2091              
2092             B
2093              
2094              
2095             my ($m, $i, $o, $p) = ( # 128
2096             #Q0 1 2
2097             #D0 1 2 3 4
2098             #W0 1 2 3 4 5 6 7 0
2099             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
2100             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
2101             '1'. '0',
2102             '10000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111',
2103             '00000000000000000000000000000000111111111111111000000000000000001000000000000000000000000000000000000000000000000000000000000000',
2104             '10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
2105             );
2106             $m = zBytes(7).'000000'.$m; # Zero pad mask
2107              
2108             is_deeply VPCOMPRESSQ($o, $m, 0, $i), $o; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2109              
2110             is_deeply VPEXPANDQ ($p, $m, 0, $o), $p;
2111              
2112              
2113             =head2 VPEXPAND
2114              
2115             Packed EXPAND
2116              
2117             =head3 VPEXPANDD($xmm1, $k2, $z, $xmm2)
2118              
2119             Packed EXPAND Dword
2120              
2121             Parameter Description
2122             1 $xmm1 Compression target
2123             2 $k2 Expansion mask
2124             3 $z Clear upper elements
2125             4 $xmm2 Source to expand
2126              
2127             B
2128              
2129              
2130             my ($m, $i, $o, $p) = ( # 128
2131             #Q0 1 2
2132             #D0 1 2 3 4
2133             #W0 1 2 3 4 5 6 7 0
2134             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
2135             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
2136             '1'. '0'. '1'. '0',
2137             '11000000000000000000000000000000111111111111111111111111111111101000000000000000000000000000000011111111111111111111111111111111',
2138             '00000000000000000000000000000000111111111111111000000000000000001100000000000000000000000000000010000000000000000000000000000000',
2139             '11000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000',
2140             );
2141             $m = zBytes(7).'0000'.$m; # Zero pad mask
2142             is_deeply VPCOMPRESSD($o, $m, 0, $i), $o;
2143              
2144             is_deeply VPEXPANDD ($p, $m, 0, $o), $p; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2145              
2146              
2147              
2148             =head3 VPEXPANDQ($xmm1, $k2, $z, $xmm2)
2149              
2150             Packed EXPAND Qword
2151              
2152             Parameter Description
2153             1 $xmm1 Compression target
2154             2 $k2 Expansion mask
2155             3 $z Clear upper elements
2156             4 $xmm2 Source to expand
2157              
2158             B
2159              
2160              
2161             my ($m, $i, $o, $p) = ( # 128
2162             #Q0 1 2
2163             #D0 1 2 3 4
2164             #W0 1 2 3 4 5 6 7 0
2165             #B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0
2166             #b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670
2167             '1'. '0',
2168             '10000000000000000000000000000000000000000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111',
2169             '00000000000000000000000000000000111111111111111000000000000000001000000000000000000000000000000000000000000000000000000000000000',
2170             '10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
2171             );
2172             $m = zBytes(7).'000000'.$m; # Zero pad mask
2173             is_deeply VPCOMPRESSQ($o, $m, 0, $i), $o;
2174              
2175             is_deeply VPEXPANDQ ($p, $m, 0, $o), $p; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
2176              
2177              
2178              
2179              
2180             =head1 Private Methods
2181              
2182             =head2 pcmpeq($size, $xmm1, $xmm2)
2183              
2184             Packed CoMPare EQual
2185              
2186             Parameter Description
2187             1 $size Size in bits
2188             2 $xmm1 Element
2189             3 $xmm2 Element
2190              
2191             =head2 pcmpgt($size, $xmm1, $xmm2)
2192              
2193             Packed CoMPare Greater Than
2194              
2195             Parameter Description
2196             1 $size Size in bits
2197             2 $xmm1 Element
2198             3 $xmm2 Element
2199              
2200             =head2 vpcmpeq($size, $k2, $xmm1, $xmm2)
2201              
2202             Packed CoMPare EQual Byte|word|double|quad with optional masking
2203              
2204             Parameter Description
2205             1 $size Size in bits: 8|16|32|64 of each element
2206             2 $k2 Optional input mask
2207             3 $xmm1 Bytes
2208             4 $xmm2 Bytes
2209              
2210             =head2 vpcmp($size, $k2, $xmm1, $xmm2, $op)
2211              
2212             Packed CoMPare
2213              
2214             Parameter Description
2215             1 $size Size of element in bits
2216             2 $k2 Input mask
2217             3 $xmm1 Bytes
2218             4 $xmm2 Bytes
2219             5 $op Test code
2220              
2221             =head2 vpcmpu($size, $k2, $xmm1, $xmm2, $op)
2222              
2223             Packed CoMPare Unsigned
2224              
2225             Parameter Description
2226             1 $size Size of element in bits
2227             2 $k2 Input mask
2228             3 $xmm1 Bytes
2229             4 $xmm2 Bytes
2230             5 $op Test code
2231              
2232             =head2 andAndTest($a, $b)
2233              
2234             And two bit strings of the same length and return 0 if the result is 0 else 1
2235              
2236             Parameter Description
2237             1 $a Element
2238             2 $b Element
2239              
2240             =head2 vptest($size, $xmm1, $xmm2)
2241              
2242             Packed TEST
2243              
2244             Parameter Description
2245             1 $size Size of element in bits
2246             2 $xmm1 Element
2247             3 $xmm2 Element
2248              
2249             =head2 vpcompress($size, $xmm1, $k2, $z, $xmm2)
2250              
2251             Packed COMPRESS
2252              
2253             Parameter Description
2254             1 $size Size of each element in bits
2255             2 $xmm1 Compression target
2256             3 $k2 Compression mask
2257             4 $z Clear upper elements
2258             5 $xmm2 Source to compress
2259              
2260             =head2 vpexpand($size, $xmm1, $k2, $z, $xmm2)
2261              
2262             Packed EXPAND
2263              
2264             Parameter Description
2265             1 $size Size of each element in bits
2266             2 $xmm1 Compression target
2267             3 $k2 Expansion mask
2268             4 $z Clear upper elements
2269             5 $xmm2 Source to expand
2270              
2271              
2272             =head1 Index
2273              
2274              
2275             1 L - And two bit strings of the same length and return 0 if the result is 0 else 1
2276              
2277             2 L - Packed CoMPare EQual
2278              
2279             3 L - Packed CoMPare EQual Byte
2280              
2281             4 L - Packed CoMPare EQual DWord
2282              
2283             5 L - Packed CoMPare EQual QWord
2284              
2285             6 L - Packed CoMPare EQual Word
2286              
2287             7 L - Packed CoMPare Greater Than
2288              
2289             8 L - Packed CoMPare Greater Than Byte
2290              
2291             9 L - Packed CoMPare Greater Than DWord
2292              
2293             10 L - Packed CoMPare Greater Than QWord
2294              
2295             11 L - Packed CoMPare Greater Than Word
2296              
2297             12 L - Packed Shift Left Logical DoubleQword
2298              
2299             13 L - Packed Shift Right Logical DoubleQword
2300              
2301             14 L - Packed TEST Mask Byte
2302              
2303             15 L - Packed TEST Mask Dword
2304              
2305             16 L - Packed TEST Mask Quad
2306              
2307             17 L - Packed TEST Mask Word
2308              
2309             18 L - Packed CoMPare
2310              
2311             19 L - Packed CoMPare Byte
2312              
2313             20 L - Packed CoMPare Dword
2314              
2315             21 L - Packed CoMPare EQual Byte|word|double|quad with optional masking
2316              
2317             22 L - Packed CoMPare EQual Byte with optional masking
2318              
2319             23 L - Packed CoMPare EQual Byte with optional masking
2320              
2321             24 L - Packed CoMPare EQual Byte with optional masking
2322              
2323             25 L - Packed CoMPare EQual Byte with optional masking
2324              
2325             26 L - Packed CoMPare Qword
2326              
2327             27 L - Packed CoMPare Unsigned
2328              
2329             28 L - Packed CoMPare Unsigned Byte
2330              
2331             29 L - Packed CoMPare Unsigned Dword
2332              
2333             30 L - Packed CoMPare Unsigned Qword
2334              
2335             31 L - Packed CoMPare Unsigned Word
2336              
2337             32 L - Packed CoMPare Word
2338              
2339             33 L - Packed COMPRESS
2340              
2341             34 L - Packed COMPRESS Dword
2342              
2343             35 L - Packed COMPRESS Qword
2344              
2345             36 L - Packed EXPAND
2346              
2347             37 L - Packed EXPAND Dword
2348              
2349             38 L - Packed EXPAND Qword
2350              
2351             39 L - Packed INSeRt Byte
2352              
2353             40 L - Packed INSeRt Dword
2354              
2355             41 L - Packed INSeRt Quad
2356              
2357             42 L - Packed INSeRt Word
2358              
2359             43 L - Packed Leading Zero CouNT Dword
2360              
2361             44 L - Packed Leading Zero CouNT Qword
2362              
2363             45 L - Packed Shift Left Logical DoubleQword
2364              
2365             46 L - Packed Shift Right Logical DoubleQword
2366              
2367             47 L - Packed TEST
2368              
2369             48 L - Packed TEST Mask Byte
2370              
2371             49 L - Packed TEST Mask Dword
2372              
2373             50 L - Packed TEST Mask Quad
2374              
2375             51 L - Packed TEST Mask Word
2376              
2377             =head1 Installation
2378              
2379             This module is written in 100% Pure Perl and, thus, it is easy to read,
2380             comprehend, use, modify and install via B:
2381              
2382             sudo cpan install Simd::Avx512
2383              
2384             =head1 Author
2385              
2386             L
2387              
2388             L
2389              
2390             =head1 Copyright
2391              
2392             Copyright (c) 2016-2021 Philip R Brenan.
2393              
2394             This module is free software. It may be used, redistributed and/or modified
2395             under the same terms as Perl itself.
2396              
2397             =cut
2398              
2399              
2400              
2401             # Tests and documentation
2402              
2403             sub test
2404 1     1 0 6 {my $p = __PACKAGE__;
2405 1         9 binmode($_, ":utf8") for *STDOUT, *STDERR;
2406 1 50       69 return if eval "eof(${p}::DATA)";
2407 1         58 my $s = eval "join('', <${p}::DATA>)";
2408 1 50       43 $@ and die $@;
2409 1 100   1 0 8 eval $s;
  1 100   1   14  
  1 100   1   43  
  1 100   1   5  
  1     9   2  
  1         28  
  1         518  
  1         1658  
  1         4  
  1         855  
  1         69418  
  1         11  
  1         273  
  9         2626  
  9         16  
  9         14  
  9         21  
  9         28  
  9         27  
  9         27  
  9         29  
  9         111  
  9         48  
2410 1 50       1116 $@ and die $@;
2411 1         152 1
2412             }
2413              
2414             test unless caller;
2415              
2416             1;
2417             # podDocumentation
2418             __DATA__