| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ |
|
2
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
3
|
|
|
|
|
|
|
# Simd::Avx512 - Emulate SIMD instructions |
|
4
|
|
|
|
|
|
|
# Philip R Brenan at appaapps dot com, Appa Apps Ltd Inc., 2021 |
|
5
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
6
|
|
|
|
|
|
|
# podDocumentation |
|
7
|
|
|
|
|
|
|
package Simd::Avx512; |
|
8
|
|
|
|
|
|
|
our $VERSION = 20210122; |
|
9
|
1
|
|
|
1
|
|
1294
|
use warnings FATAL => qw(all); |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
37
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
30
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
76
|
|
|
12
|
1
|
|
|
1
|
|
604
|
use Data::Dump qw(dump); |
|
|
1
|
|
|
|
|
7937
|
|
|
|
1
|
|
|
|
|
61
|
|
|
13
|
1
|
|
|
1
|
|
7
|
use feature qw(say current_sub); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3232
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my $develop = -e q(/home/phil/); # Development mode |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub repeat($$) # Repeat a string |
|
18
|
157
|
|
|
157
|
0
|
258
|
{my ($string, $repeat) = @_; # String to repeat, number of repetitions |
|
19
|
157
|
|
|
|
|
970
|
$string x $repeat |
|
20
|
|
|
|
|
|
|
} |
|
21
|
|
|
|
|
|
|
|
|
22
|
63
|
|
|
63
|
0
|
104
|
sub zByte {repeat('0', 8)} # Zero byte |
|
23
|
0
|
|
|
0
|
0
|
0
|
sub zWord {repeat('0', 16)} # Zero word |
|
24
|
0
|
|
|
0
|
0
|
0
|
sub zDWord {repeat('0', 32)} # Zero double word |
|
25
|
31
|
|
|
31
|
0
|
60
|
sub zQWord {repeat('0', 64)} # Zero quad word |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub zBytes($) # String of zero bytes of specified length |
|
28
|
63
|
|
|
63
|
0
|
109
|
{my ($length) = @_; # Length |
|
29
|
63
|
|
|
|
|
144
|
repeat(zByte, $length) |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub zWords($) # String of zero words of specified length |
|
33
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
|
34
|
0
|
|
|
|
|
0
|
repeat(zWord, $length) |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub zDWords($) # String of zero double words of specified length |
|
38
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
|
39
|
0
|
|
|
|
|
0
|
repeat(zDWord, $length) |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub zQWords($) # String of zero quad words of specified length |
|
43
|
0
|
|
|
0
|
0
|
0
|
{my ($length) = @_; # Length |
|
44
|
0
|
|
|
|
|
0
|
repeat(zQWord, $length) |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub byte($) # A byte with the specified value |
|
48
|
16
|
|
|
16
|
0
|
26
|
{my ($value) = @_; # Value of the byte |
|
49
|
16
|
50
|
33
|
|
|
46
|
confess "0 - 2**8 required ($value)" unless $value >= 0 and $value < 2**8; |
|
50
|
16
|
|
|
|
|
54
|
sprintf("%08b", $value) |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub word($) # A word with the specified value |
|
54
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the word |
|
55
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**16 required ($value)" unless $value >= 0 and $value < 2**16; |
|
56
|
0
|
|
|
|
|
0
|
sprintf("%016b", $value) |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub dWord($) # A double word with the specified value |
|
60
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the double word |
|
61
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**32 required ($value)" unless $value >= 0 and $value < 2**32; |
|
62
|
0
|
|
|
|
|
0
|
sprintf("%032b", $value) |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub qWord($) # A quad word with the specified value |
|
66
|
0
|
|
|
0
|
0
|
0
|
{my ($value) = @_; # Value of the quad word |
|
67
|
0
|
0
|
0
|
|
|
0
|
confess "0 - 2**64 required ($value)" unless $value >= 0 and $value < 2**64; |
|
68
|
0
|
|
|
|
|
0
|
sprintf("%064b", $value) |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
31
|
|
|
31
|
0
|
64
|
sub maskRegister {zQWord} # Mask register set to zero |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub require8or16or32or64($) # Check that we have a size of 8|16|32|64 bits |
|
74
|
0
|
|
|
0
|
0
|
0
|
{my ($size) = @_; # Size to check |
|
75
|
0
|
0
|
0
|
|
|
0
|
confess "8|16|32|64 required for operand ($size)" unless $size == 8 or $size == 16 or $size == 32 or $size == 64; |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub require64($) # Check that we have a string of 64 bits |
|
79
|
28
|
|
|
28
|
0
|
48
|
{my ($xmm) = @_; # Bytes |
|
80
|
28
|
50
|
|
|
|
58
|
defined($xmm) or confess; |
|
81
|
28
|
|
|
|
|
43
|
my $l = length $xmm; |
|
82
|
28
|
50
|
|
|
|
59
|
confess "64 bits required for operand ($l)" unless $l == 64; |
|
83
|
28
|
50
|
|
|
|
121
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub require128($) # Check that we have a string of 128 bits |
|
87
|
18
|
|
|
18
|
0
|
29
|
{my ($xmm) = @_; # Bytes |
|
88
|
18
|
|
|
|
|
23
|
my $l = length $xmm; |
|
89
|
18
|
50
|
|
|
|
36
|
confess "128 bits required for operand ($l)" unless $l == 128; |
|
90
|
18
|
50
|
|
|
|
69
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub require128or245or512($;$) # Check that we have a string of 128|256|512 bits in the first operand and optionally the same in the second operand |
|
94
|
39
|
|
|
39
|
0
|
80
|
{my ($xmm1, $xmm2) = @_; # Bytes, optional bytes |
|
95
|
39
|
|
|
|
|
63
|
my $l = length $xmm1; |
|
96
|
39
|
50
|
100
|
|
|
182
|
confess "128|256|512 bits required for first operand ($l)" unless $l == 128 or $l == 256 or $l == 512; |
|
|
|
|
66
|
|
|
|
|
|
97
|
39
|
100
|
|
|
|
105
|
if (defined $xmm2) |
|
98
|
33
|
|
|
|
|
51
|
{my $m = length $xmm2; |
|
99
|
33
|
50
|
100
|
|
|
131
|
confess "128|256|512 bits required for second operand ($m)" unless $m == 128 or $m == 256 or $m == 512; |
|
|
|
|
66
|
|
|
|
|
|
100
|
33
|
50
|
|
|
|
75
|
confess "Operands must have same length($l,$m)" unless $l == $m; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub require64or128or245or512($) # Check that we have a string of 64|128|256|512 bits |
|
105
|
3
|
|
|
3
|
0
|
13
|
{my ($xmm) = @_; # Bytes |
|
106
|
3
|
|
|
|
|
7
|
my $l = length $xmm; |
|
107
|
3
|
0
|
33
|
|
|
8
|
confess "64|128|256|512 bits required for operand" unless $l == 64 or $l == 128 or $l == 256 or $l == 512; |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
108
|
3
|
50
|
|
|
|
16
|
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub requireSameLength($$) # Check that the two operands have the same length |
|
112
|
16
|
|
|
16
|
0
|
26
|
{my ($xmm1, $xmm2) = @_; # Bytes, bytes |
|
113
|
16
|
|
|
|
|
30
|
my ($l, $L) = (length($xmm1), length($xmm2)); |
|
114
|
16
|
50
|
|
|
|
35
|
confess "Operands have different lengths($l, $L)" unless $l == $L; |
|
115
|
16
|
|
|
|
|
32
|
$l |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub flipBitsUnderMask($$) # Flip the bits in a string where the corresponding mask bit is 1 else leave the bit as is |
|
119
|
15
|
|
|
15
|
0
|
32
|
{my ($string, $mask) = @_; # Bit string, mask |
|
120
|
15
|
|
|
|
|
30
|
my $l = requireSameLength $string, $mask; |
|
121
|
15
|
|
|
|
|
21
|
my $f = ''; |
|
122
|
15
|
|
|
|
|
36
|
for my $i(0..$l-1) # Each character in the string and mask |
|
123
|
820
|
|
|
|
|
1053
|
{my $s = substr($string, $i, 1); |
|
124
|
820
|
100
|
|
|
|
1413
|
$f .= substr($mask, $i, 1) eq '0' ? $s : $s eq '0' ? '1' : '0' |
|
|
|
100
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
$f |
|
127
|
15
|
|
|
|
|
89
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
#D1 Instructions # Emulation of Avx512 instructions |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub PSLLDQ($$) # Packed Shift Left Logical DoubleQword |
|
132
|
8
|
|
|
8
|
1
|
19
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
|
133
|
8
|
|
|
|
|
19
|
require128 $xmm1; # Check that we have a string of 128 bits |
|
134
|
8
|
|
|
|
|
19
|
substr($xmm1, $imm8 * 8).zBytes($imm8) |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub VPSLLDQ($$) # Packed Shift Left Logical DoubleQword |
|
138
|
3
|
|
|
3
|
1
|
10
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
|
139
|
3
|
|
|
|
|
9
|
require128or245or512 $xmm1; # Check that we have a string of 128 bits |
|
140
|
3
|
50
|
33
|
|
|
15
|
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16; |
|
141
|
|
|
|
|
|
|
|
|
142
|
3
|
100
|
|
|
|
8
|
return PSLLDQ($xmm1, $imm8) if length($xmm1) == 128; |
|
143
|
|
|
|
|
|
|
|
|
144
|
2
|
100
|
|
|
|
7
|
return PSLLDQ(substr($xmm1, 0, 128), $imm8). |
|
145
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256; |
|
146
|
|
|
|
|
|
|
|
|
147
|
1
|
|
|
|
|
10
|
return PSLLDQ(substr($xmm1, 0, 128), $imm8). |
|
148
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 128, 128), $imm8). |
|
149
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 256, 128), $imm8). |
|
150
|
|
|
|
|
|
|
PSLLDQ(substr($xmm1, 384, 128), $imm8) |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub PSRLDQ($$) # Packed Shift Right Logical DoubleQword |
|
154
|
8
|
|
|
8
|
1
|
18
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
|
155
|
8
|
|
|
|
|
19
|
require128 $xmm1; # Check that we have a string of 128 bits |
|
156
|
8
|
|
|
|
|
18
|
zBytes($imm8).substr($xmm1, 0, 128 - $imm8 * 8) |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub VPSRLDQ($$) # Packed Shift Right Logical DoubleQword |
|
160
|
3
|
|
|
3
|
1
|
8
|
{my ($xmm1, $imm8) = @_; # Bytes, length of shift |
|
161
|
3
|
|
|
|
|
8
|
require128or245or512 $xmm1; # Check that we have a string of 128 bits |
|
162
|
3
|
50
|
33
|
|
|
13
|
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16; |
|
163
|
|
|
|
|
|
|
|
|
164
|
3
|
100
|
|
|
|
9
|
return PSRLDQ($xmm1, $imm8) if length($xmm1) == 128; |
|
165
|
|
|
|
|
|
|
|
|
166
|
2
|
100
|
|
|
|
12
|
return PSRLDQ(substr($xmm1, 0, 128), $imm8). |
|
167
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256; |
|
168
|
|
|
|
|
|
|
|
|
169
|
1
|
|
|
|
|
5
|
return PSRLDQ(substr($xmm1, 0, 128), $imm8). |
|
170
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 128, 128), $imm8). |
|
171
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 256, 128), $imm8). |
|
172
|
|
|
|
|
|
|
PSRLDQ(substr($xmm1, 384, 128), $imm8) |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub PCMPEQB($$) # Packed CoMPare EQual Byte |
|
176
|
1
|
|
|
1
|
1
|
5
|
{my ($xmm1, $xmm2) = @_; # Bytes, bytes |
|
177
|
1
|
|
|
|
|
3
|
require128 $xmm1; # Check that we have a string of 128 bits in the first operand |
|
178
|
1
|
|
|
|
|
3
|
require128 $xmm2; # Check that we have a string of 128 bits in the second operand |
|
179
|
1
|
|
|
|
|
6
|
requireSameLength $xmm1, $xmm2; # Check operands have the same length |
|
180
|
1
|
|
|
|
|
1
|
my $N = 16; # Bytes in operation |
|
181
|
1
|
|
|
|
|
3
|
my $xmm3 = zBytes $N; |
|
182
|
1
|
|
|
|
|
12
|
for(0..$N-1) |
|
183
|
16
|
100
|
|
|
|
42
|
{substr($xmm3, $_*8, 8) = substr($xmm1, $_*8, 8) eq substr($xmm2, $_*8, 8) ? |
|
184
|
|
|
|
|
|
|
byte(255) : byte(0); |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
$xmm3 |
|
187
|
1
|
|
|
|
|
18
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub vpcmpeq($$$;$) #P Packed CoMPare EQual Byte|word|double|quad with optional masking |
|
190
|
5
|
|
|
5
|
1
|
12
|
{my ($size, $k2, $xmm1, $xmm2) = @_; # Size in bits: 8|16|32|64 of each element, optional input mask, bytes, bytes |
|
191
|
|
|
|
|
|
|
|
|
192
|
5
|
50
|
|
|
|
13
|
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right |
|
193
|
5
|
100
|
|
|
|
16
|
require64or128or245or512 $k2 if defined $k2; # Optional mask |
|
194
|
5
|
|
|
|
|
15
|
require128or245or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand |
|
195
|
|
|
|
|
|
|
|
|
196
|
5
|
|
|
|
|
10
|
my $N = length($xmm1) / $size; # Bytes|Words|Doubles|Quads in operation |
|
197
|
5
|
100
|
|
|
|
11
|
if (defined $k2) # Masked operation |
|
198
|
3
|
|
|
|
|
20
|
{my $k1 = maskRegister; # Result register |
|
199
|
3
|
100
|
|
|
|
9
|
$k2 = substr($k2, 48) if $N == 16; # Relevant portion of register |
|
200
|
3
|
100
|
|
|
|
17
|
$k2 = substr($k2, 32) if $N == 32; |
|
201
|
3
|
|
|
|
|
13
|
for(0..$N-1) |
|
202
|
112
|
100
|
|
|
|
189
|
{next unless substr($k2, $_, 1) eq '1'; |
|
203
|
16
|
|
|
|
|
24
|
my $o = $_ * $size; |
|
204
|
16
|
100
|
|
|
|
37
|
substr($k1, $_, 1) = substr($xmm1, $o, $size) eq |
|
205
|
|
|
|
|
|
|
substr($xmm2, $o, $size) ? '1' : '0'; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
3
|
100
|
|
|
|
16
|
return zBytes(6).substr($k1, 0, 16) if $N == 16; |
|
208
|
2
|
100
|
|
|
|
6
|
return zBytes(4).substr($k1, 0, 32) if $N == 32; |
|
209
|
1
|
|
|
|
|
5
|
return $k1 |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
2
|
|
|
|
|
5
|
my $xmm3 = zBytes $N; # Non masked operation |
|
213
|
2
|
|
|
|
|
5
|
my $clear = '0' x $size; |
|
214
|
2
|
|
|
|
|
7
|
my $set = '1' x $size; |
|
215
|
2
|
|
|
|
|
8
|
for(0..$N-1) |
|
216
|
48
|
|
|
|
|
60
|
{my $o = $_ * $size; |
|
217
|
48
|
100
|
|
|
|
94
|
substr($xmm3, $o, $size) = substr($xmm1, $o, $size) eq |
|
218
|
|
|
|
|
|
|
substr($xmm2, $o, $size) ? $set : $clear |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
$xmm3 |
|
221
|
2
|
|
|
|
|
9
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub VPCMPEQB($$;$) # Packed CoMPare EQual Byte with optional masking |
|
224
|
5
|
100
|
|
5
|
1
|
26
|
{my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes |
|
225
|
5
|
|
|
|
|
13
|
vpcmpeq(8, $k2, $xmm1, $xmm2) |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub VPCMPEQW($$;$) # Packed CoMPare EQual Byte with optional masking |
|
229
|
0
|
0
|
|
0
|
1
|
0
|
{my ($k2, $xmm1, $xmm2) = @_ == 3 ? @_ : (undef, @_); # Optional input mask, bytes, bytes |
|
230
|
0
|
|
|
|
|
0
|
vpcmpeq(16, $k2, $xmm1, $xmm2) |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub vpcmpu($$$$$) # Packed CoMPare Unsigned Byte |
|
234
|
28
|
|
|
28
|
1
|
59
|
{my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code |
|
235
|
|
|
|
|
|
|
|
|
236
|
28
|
50
|
|
|
|
68
|
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right |
|
237
|
28
|
|
|
|
|
77
|
require64 $k2; # Mask |
|
238
|
28
|
|
|
|
|
68
|
require128or245or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand |
|
239
|
28
|
50
|
|
|
|
104
|
confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $T = # String tests |
|
242
|
33
|
100
|
|
33
|
|
106
|
[sub {return 1 if $_[0] eq $_[1]; 0}, # eq 0 |
|
|
19
|
|
|
|
|
51
|
|
|
243
|
25
|
100
|
|
25
|
|
69
|
sub {return 1 if $_[0] lt $_[1]; 0}, # lt 1 |
|
|
18
|
|
|
|
|
42
|
|
|
244
|
25
|
100
|
|
25
|
|
79
|
sub {return 1 if $_[0] le $_[1]; 0}, # le 2 |
|
|
10
|
|
|
|
|
27
|
|
|
245
|
|
|
|
|
|
|
undef, |
|
246
|
33
|
100
|
|
33
|
|
102
|
sub {return 1 if $_[0] ne $_[1]; 0}, # ne 4 |
|
|
14
|
|
|
|
|
44
|
|
|
247
|
25
|
100
|
|
25
|
|
83
|
sub {return 1 if $_[0] ge $_[1]; 0}, # ge 5 |
|
|
7
|
|
|
|
|
19
|
|
|
248
|
25
|
100
|
|
25
|
|
73
|
sub {return 1 if $_[0] gt $_[1]; 0}, # gt 6 |
|
|
15
|
|
|
|
|
39
|
|
|
249
|
28
|
|
|
|
|
215
|
]; |
|
250
|
|
|
|
|
|
|
|
|
251
|
28
|
|
|
|
|
68
|
my $N = length($xmm1) / $size; # Number of elements |
|
252
|
28
|
|
|
|
|
52
|
my $k1 = maskRegister; |
|
253
|
28
|
|
|
|
|
75
|
$k2 = substr($k2, -$N); # Relevant portion of mask |
|
254
|
28
|
|
|
|
|
74
|
for(0..$N-1) |
|
255
|
816
|
100
|
|
|
|
1406
|
{next unless substr($k2, $_, 1) eq '1'; # Mask |
|
256
|
166
|
|
|
|
|
216
|
my $o = $_ * $size; |
|
257
|
166
|
100
|
|
|
|
279
|
substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code |
|
|
166
|
|
|
|
|
265
|
|
|
258
|
|
|
|
|
|
|
substr($xmm2, $o, $size)) ? '1' : '0'; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
28
|
|
|
|
|
48
|
substr(zBytes(8).substr($k1, 0, $N), -64) |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub VPCMPUB($$$$) # Packed CoMPare Unsigned Byte |
|
265
|
10
|
|
|
10
|
1
|
27
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code |
|
266
|
10
|
|
|
|
|
34
|
vpcmpu 8, $k2, $xmm1, $xmm2, $op |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub VPCMPUW($$$$) # Packed CoMPare Unsigned Word |
|
270
|
6
|
|
|
6
|
1
|
18
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code |
|
271
|
6
|
|
|
|
|
14
|
vpcmpu 16, $k2, $xmm1, $xmm2, $op |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub VPCMPUD($$$$) # Packed CoMPare Unsigned Dword |
|
275
|
6
|
|
|
6
|
1
|
16
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code |
|
276
|
6
|
|
|
|
|
15
|
vpcmpu 32, $k2, $xmm1, $xmm2, $op |
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub VPCMPUQ($$$$) # Packed CoMPare Unsigned Qword |
|
280
|
6
|
|
|
6
|
1
|
16
|
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code |
|
281
|
6
|
|
|
|
|
13
|
vpcmpu 64, $k2, $xmm1, $xmm2, $op |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#D0 |
|
285
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
286
|
|
|
|
|
|
|
# Export |
|
287
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
288
|
|
|
|
|
|
|
|
|
289
|
1
|
|
|
1
|
|
8
|
use Exporter qw(import); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
67
|
|
|
290
|
|
|
|
|
|
|
|
|
291
|
1
|
|
|
1
|
|
7
|
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
450
|
|
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
|
294
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
|
295
|
|
|
|
|
|
|
); |
|
296
|
|
|
|
|
|
|
%EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]); |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# podDocumentation |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=pod |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=encoding utf-8 |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 Name |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Simd::Avx512 - Emulate SIMD instructions |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head1 Synopsis |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Help needed please! |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=head1 Description |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Emulate SIMD instructions |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Version 20210122. |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
The following sections describe the methods in each functional area of this |
|
321
|
|
|
|
|
|
|
module. For an alphabetic listing of all methods by name see L. |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 Instructions |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Emulation of Avx512 instructions |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 PSLLDQ($xmm1, $imm8) |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Packed Shift Left Logical DoubleQword |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Parameter Description |
|
334
|
|
|
|
|
|
|
1 $xmm1 Bytes |
|
335
|
|
|
|
|
|
|
2 $imm8 Length of shift |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
B |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
is_deeply PSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
344
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
345
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
346
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
347
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
348
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
349
|
|
|
|
|
|
|
,2), |
|
350
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 VPSLLDQ($xmm1, $imm8) |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Packed Shift Left Logical DoubleQword |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Parameter Description |
|
358
|
|
|
|
|
|
|
1 $xmm1 Bytes |
|
359
|
|
|
|
|
|
|
2 $imm8 Length of shift |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
B |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
368
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
369
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
370
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
371
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
372
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
373
|
|
|
|
|
|
|
,2), |
|
374
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
380
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
381
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
382
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
383
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
384
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
385
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
386
|
|
|
|
|
|
|
,2), |
|
387
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
388
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
is_deeply VPSLLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
394
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
395
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
396
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
397
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
398
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
399
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
400
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
401
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
402
|
|
|
|
|
|
|
,2), |
|
403
|
|
|
|
|
|
|
'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
404
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
405
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
406
|
|
|
|
|
|
|
.'00000000110000000000000001000000000010000000000001100000000000000000001111110000000000000000000000000000000011110000000000000000' |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 PSRLDQ($xmm1, $imm8) |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Packed Shift Right Logical DoubleQword |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Parameter Description |
|
414
|
|
|
|
|
|
|
1 $xmm1 Bytes |
|
415
|
|
|
|
|
|
|
2 $imm8 Length of shift |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
B |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
is_deeply PSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
424
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
425
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
426
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
427
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
428
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
429
|
|
|
|
|
|
|
,2), |
|
430
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 VPSRLDQ($xmm1, $imm8) |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Packed Shift Right Logical DoubleQword |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Parameter Description |
|
438
|
|
|
|
|
|
|
1 $xmm1 Bytes |
|
439
|
|
|
|
|
|
|
2 $imm8 Length of shift |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
B |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
448
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
449
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
450
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
451
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
452
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
453
|
|
|
|
|
|
|
,2), |
|
454
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 2*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
460
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
461
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
462
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
463
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
464
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
465
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
466
|
|
|
|
|
|
|
,2), |
|
467
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
468
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
is_deeply VPSRLDQ( # 4*128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
474
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
475
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
476
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
477
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
478
|
|
|
|
|
|
|
'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
479
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
480
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
481
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000000100000000001000000000000110000000000000000000111111000000000000000000000000000000001111' |
|
482
|
|
|
|
|
|
|
,2), |
|
483
|
|
|
|
|
|
|
'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
484
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
485
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
486
|
|
|
|
|
|
|
.'00000000000000001110000100001000000000001100000000000000010000000000100000000000011000000000000000000011111100000000000000000000' |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head2 PCMPEQB($xmm1, $xmm2) |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
Packed CoMPare EQual Byte |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Parameter Description |
|
494
|
|
|
|
|
|
|
1 $xmm1 Bytes |
|
495
|
|
|
|
|
|
|
2 $xmm2 Bytes |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
B |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
is_deeply PCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
504
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
505
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
506
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
507
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
508
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
|
509
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
|
510
|
|
|
|
|
|
|
), |
|
511
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 VPCMPEQB($k2, $xmm1, $xmm2) |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Packed CoMPare EQual Byte with optional masking |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Parameter Description |
|
519
|
|
|
|
|
|
|
1 $k2 Optional input mask |
|
520
|
|
|
|
|
|
|
2 $xmm1 Bytes |
|
521
|
|
|
|
|
|
|
3 $xmm2 Bytes |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
B |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
530
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
531
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
532
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
533
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
534
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
|
535
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
|
536
|
|
|
|
|
|
|
), |
|
537
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
543
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
544
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
545
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
546
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
547
|
|
|
|
|
|
|
'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
|
548
|
|
|
|
|
|
|
.'11100001000010000000000011000000000000011000000000010000000000001100000000000000000001111110000000000000000000000000000000011110' |
|
549
|
|
|
|
|
|
|
,'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
|
550
|
|
|
|
|
|
|
.'11100001000001000000000011000000000000010000000000100000000000001100000000000000000001111110000000000000000000000000000000011010' |
|
551
|
|
|
|
|
|
|
), |
|
552
|
|
|
|
|
|
|
'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
|
553
|
|
|
|
|
|
|
.'11111111000000001111111111111111111111110000000000000000111111111111111111111111111111111111111111111111111111111111111100000000' |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 128 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
559
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
560
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
561
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
562
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
563
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
564
|
|
|
|
|
|
|
'00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
565
|
|
|
|
|
|
|
'10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
566
|
|
|
|
|
|
|
), |
|
567
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 256 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
573
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
574
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
575
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
576
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
577
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
578
|
|
|
|
|
|
|
'0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
579
|
|
|
|
|
|
|
'1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
580
|
|
|
|
|
|
|
), |
|
581
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
is_deeply VPCMPEQB( # 512 # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
587
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
588
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
589
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
590
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
591
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
592
|
|
|
|
|
|
|
'00000000110000001000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
593
|
|
|
|
|
|
|
'00000000110000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
594
|
|
|
|
|
|
|
), |
|
595
|
|
|
|
|
|
|
'1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'; |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head2 VPCMPEQW($k2, $xmm1, $xmm2) |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Packed CoMPare EQual Byte with optional masking |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
Parameter Description |
|
603
|
|
|
|
|
|
|
1 $k2 Optional input mask |
|
604
|
|
|
|
|
|
|
2 $xmm1 Bytes |
|
605
|
|
|
|
|
|
|
3 $xmm2 Bytes |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head2 vpcmpu($size, $k2, $xmm1, $xmm2, $op) |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Packed CoMPare Unsigned Byte |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
Parameter Description |
|
612
|
|
|
|
|
|
|
1 $size Size of element in bits |
|
613
|
|
|
|
|
|
|
2 $k2 Input mask |
|
614
|
|
|
|
|
|
|
3 $xmm1 Bytes |
|
615
|
|
|
|
|
|
|
4 $xmm2 Bytes |
|
616
|
|
|
|
|
|
|
5 $op Test code |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head2 VPCMPUB($k2, $xmm1, $xmm2, $op) |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Packed CoMPare Unsigned Byte |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
Parameter Description |
|
623
|
|
|
|
|
|
|
1 $k2 Input mask |
|
624
|
|
|
|
|
|
|
2 $xmm1 Bytes |
|
625
|
|
|
|
|
|
|
3 $xmm2 Bytes |
|
626
|
|
|
|
|
|
|
4 $op Test code |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
B |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
my ($mi, $mo, $o1, $o2) = ( # 128 |
|
632
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
633
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
634
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
635
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
636
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
637
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
638
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
639
|
|
|
|
|
|
|
'00000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
640
|
|
|
|
|
|
|
'10000001000010000000000011000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
641
|
|
|
|
|
|
|
); |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(6).flipBitsUnderMask substr($mo, 48), substr($mi, 48); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
my ($mi, $mo, $o1, $o2) = ( # 256 |
|
651
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
652
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
653
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
654
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
655
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
656
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
657
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
658
|
|
|
|
|
|
|
'0000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
659
|
|
|
|
|
|
|
'1000000100001000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
660
|
|
|
|
|
|
|
); |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $mo; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), zBytes(4).flipBitsUnderMask substr($mo, 32), substr($mi, 32); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
|
669
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
670
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
671
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
672
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
673
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
674
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
675
|
|
|
|
|
|
|
'1'. '1'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
676
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
677
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
678
|
|
|
|
|
|
|
'00000000110000001000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
679
|
|
|
|
|
|
|
'00000000110000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
680
|
|
|
|
|
|
|
); |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
is_deeply VPCMPUB($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 VPCMPUW($k2, $xmm1, $xmm2, $op) |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Packed CoMPare Unsigned Word |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Parameter Description |
|
706
|
|
|
|
|
|
|
1 $k2 Input mask |
|
707
|
|
|
|
|
|
|
2 $xmm1 Words |
|
708
|
|
|
|
|
|
|
3 $xmm2 Words |
|
709
|
|
|
|
|
|
|
4 $op Test code |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
B |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
|
715
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
716
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
717
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
718
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
719
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
720
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
721
|
|
|
|
|
|
|
'1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
722
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
723
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
724
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
725
|
|
|
|
|
|
|
'00000000110000000000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', |
|
726
|
|
|
|
|
|
|
); |
|
727
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
|
728
|
|
|
|
|
|
|
{$$i = zBytes(4).$$i; |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
is_deeply VPCMPUW($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 VPCMPUD($k2, $xmm1, $xmm2, $op) |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Packed CoMPare Unsigned Dword |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Parameter Description |
|
755
|
|
|
|
|
|
|
1 $k2 Input mask |
|
756
|
|
|
|
|
|
|
2 $xmm1 Dwords |
|
757
|
|
|
|
|
|
|
3 $xmm2 Dwords |
|
758
|
|
|
|
|
|
|
4 $op Test code |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
B |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
|
764
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
765
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
766
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
767
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
768
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
769
|
|
|
|
|
|
|
'1'. '1'. '1'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
770
|
|
|
|
|
|
|
'0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
771
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '0', |
|
772
|
|
|
|
|
|
|
'1'. '0'. '0'. '0'. '0'. '0'. '0'. '0'. '1'. '0'. '0'. '0'. '0'. '0'. '0'. '1', |
|
773
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000', |
|
774
|
|
|
|
|
|
|
'00000000110000000000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000', |
|
775
|
|
|
|
|
|
|
); |
|
776
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
|
777
|
|
|
|
|
|
|
{$$i = zBytes(6).$$i; |
|
778
|
|
|
|
|
|
|
} |
|
779
|
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
is_deeply VPCMPUD($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 VPCMPUQ($k2, $xmm1, $xmm2, $op) |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Packed CoMPare Unsigned Qword |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Parameter Description |
|
804
|
|
|
|
|
|
|
1 $k2 Input mask |
|
805
|
|
|
|
|
|
|
2 $xmm1 Qwords |
|
806
|
|
|
|
|
|
|
3 $xmm2 Qwords |
|
807
|
|
|
|
|
|
|
4 $op Test code |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
B |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
my ($mi, $meq, $mlt, $mgt, $o1, $o2) = ( # 512 |
|
813
|
|
|
|
|
|
|
#Q0 1 2 3 4 5 6 7 8 |
|
814
|
|
|
|
|
|
|
#D0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
815
|
|
|
|
|
|
|
#W0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
816
|
|
|
|
|
|
|
#B0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 8 |
|
817
|
|
|
|
|
|
|
#b012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345670123456701234567012345678 |
|
818
|
|
|
|
|
|
|
'1'. '1'. '1'. '1'. '1'. '0'. '0'. '1', |
|
819
|
|
|
|
|
|
|
'0'. '1'. '0'. '0'. '0'. '0'. '0'. '0', |
|
820
|
|
|
|
|
|
|
'1'. '0'. '1'. '1'. '0'. '0'. '0'. '0', |
|
821
|
|
|
|
|
|
|
'0'. '0'. '0'. '0'. '1'. '0'. '0'. '1', |
|
822
|
|
|
|
|
|
|
'00000000110000001000000001100000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000000000000000110000000110000011111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000', |
|
823
|
|
|
|
|
|
|
'00000000110000001000000001100000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000011000001100000111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000', |
|
824
|
|
|
|
|
|
|
); |
|
825
|
|
|
|
|
|
|
for my $i(\($mi, $meq, $mlt, $mgt)) |
|
826
|
|
|
|
|
|
|
{$$i = zBytes(7).$$i; |
|
827
|
|
|
|
|
|
|
} |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 0), $meq; # eq # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
830
|
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 1), $mlt; # lt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 2), flipBitsUnderMask $mgt, $mi; # le # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 4), flipBitsUnderMask $meq, $mi; # ne # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 5), flipBitsUnderMask $mlt, $mi; # ge # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
is_deeply VPCMPUQ($mi, $o1, $o2, 6), $mgt; # gt # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲 |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
=head1 Private Methods |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=head2 vpcmpeq($size, $k2, $xmm1, $xmm2) |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Packed CoMPare EQual Byte|word|double|quad with optional masking |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
Parameter Description |
|
856
|
|
|
|
|
|
|
1 $size Size in bits: 8|16|32|64 of each element |
|
857
|
|
|
|
|
|
|
2 $k2 Optional input mask |
|
858
|
|
|
|
|
|
|
3 $xmm1 Bytes |
|
859
|
|
|
|
|
|
|
4 $xmm2 Bytes |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
=head1 Index |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
1 L - Packed CoMPare EQual Byte |
|
866
|
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
2 L - Packed Shift Left Logical DoubleQword |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
3 L - Packed Shift Right Logical DoubleQword |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
4 L - Packed CoMPare EQual Byte|word|double|quad with optional masking |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
5 L - Packed CoMPare EQual Byte with optional masking |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
6 L - Packed CoMPare EQual Byte with optional masking |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
7 L - Packed CoMPare Unsigned Byte |
|
878
|
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
8 L - Packed CoMPare Unsigned Byte |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
9 L - Packed CoMPare Unsigned Dword |
|
882
|
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
10 L - Packed CoMPare Unsigned Qword |
|
884
|
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
11 L - Packed CoMPare Unsigned Word |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
12 L - Packed Shift Left Logical DoubleQword |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
13 L - Packed Shift Right Logical DoubleQword |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
=head1 Installation |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
This module is written in 100% Pure Perl and, thus, it is easy to read, |
|
894
|
|
|
|
|
|
|
comprehend, use, modify and install via B: |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
sudo cpan install Simd::Avx512 |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
=head1 Author |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
L |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
L |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=head1 Copyright |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
Copyright (c) 2016-2019 Philip R Brenan. |
|
907
|
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
This module is free software. It may be used, redistributed and/or modified |
|
909
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
# Tests and documentation |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub test |
|
918
|
1
|
|
|
1
|
0
|
6
|
{my $p = __PACKAGE__; |
|
919
|
1
|
|
|
|
|
8
|
binmode($_, ":utf8") for *STDOUT, *STDERR; |
|
920
|
1
|
50
|
|
|
|
68
|
return if eval "eof(${p}::DATA)"; |
|
921
|
1
|
|
|
|
|
75
|
my $s = eval "join('', <${p}::DATA>)"; |
|
922
|
1
|
50
|
|
|
|
13
|
$@ and die $@; |
|
923
|
1
|
|
|
1
|
|
6
|
eval $s; |
|
|
1
|
|
|
1
|
|
14
|
|
|
|
1
|
|
|
1
|
|
42
|
|
|
|
1
|
|
|
1
|
|
5
|
|
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
29
|
|
|
|
1
|
|
|
|
|
539
|
|
|
|
1
|
|
|
|
|
1294
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
853
|
|
|
|
1
|
|
|
|
|
68292
|
|
|
|
1
|
|
|
|
|
9
|
|
|
|
1
|
|
|
|
|
126
|
|
|
924
|
1
|
50
|
|
|
|
833
|
$@ and die $@; |
|
925
|
1
|
|
|
|
|
146
|
1 |
|
926
|
|
|
|
|
|
|
} |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
test unless caller; |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
1; |
|
931
|
|
|
|
|
|
|
# podDocumentation |
|
932
|
|
|
|
|
|
|
__DATA__ |