| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* Copyright 1999 Dr. Brian Gladman |
|
3
|
|
|
|
|
|
|
* Copyright 2001 Abhijit Menon-Sen |
|
4
|
|
|
|
|
|
|
*/ |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
/* Twofish is a 128-bit symmetric block cipher with a variable length |
|
7
|
|
|
|
|
|
|
key, developed by Counterpane Labs. It is unpatented and free for all |
|
8
|
|
|
|
|
|
|
uses, as described at |
|
9
|
|
|
|
|
|
|
and . |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
This implementation is based on code by Dr. Brian Gladman, at |
|
12
|
|
|
|
|
|
|
. |
|
13
|
|
|
|
|
|
|
Some of his comments are reproduced below: |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
"Copyright in this implementation is held by Dr. B R Gladman but I |
|
16
|
|
|
|
|
|
|
hereby give permission for its free direct or derivative use subject |
|
17
|
|
|
|
|
|
|
to ackowledgement of its origin and compliance with any conditions |
|
18
|
|
|
|
|
|
|
that the originators of the algorithm place on its exploitation. |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
My thanks to Doug Whiting and Niels Ferguson for comments that led to |
|
21
|
|
|
|
|
|
|
improvements in this implementation." */ |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#include "twofish.h" |
|
24
|
|
|
|
|
|
|
#include "tables.h" |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
/* Extract the n'th byte from a 32-bit word */ |
|
27
|
|
|
|
|
|
|
#define byte(x,n) ((unsigned char)((x) >> (8 * n))) |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
/* 32 bit rotate-left and right macros */ |
|
30
|
|
|
|
|
|
|
#define ror(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) |
|
31
|
|
|
|
|
|
|
#define rol(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
/* Endian-independent byte -> word conversion */ |
|
34
|
|
|
|
|
|
|
#define strtonl(s) (uint32_t)(*(s)|*(s+1)<<8|*(s+2)<<16|*(s+3)<<24) |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
#define nltostr(l, s) \ |
|
37
|
|
|
|
|
|
|
do { \ |
|
38
|
|
|
|
|
|
|
*(s )=(unsigned char)((l) ); \ |
|
39
|
|
|
|
|
|
|
*(s+1)=(unsigned char)((l) >> 8); \ |
|
40
|
|
|
|
|
|
|
*(s+2)=(unsigned char)((l) >> 16); \ |
|
41
|
|
|
|
|
|
|
*(s+3)=(unsigned char)((l) >> 24); \ |
|
42
|
|
|
|
|
|
|
} while (0) |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
static uint32_t mds_rem(uint32_t a, uint32_t b); |
|
45
|
|
|
|
|
|
|
static uint32_t h(int len, const int x, unsigned char *key, int odd); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
/* The key schedule takes a 128, 192, or 256-bit key, and provides 40 |
|
48
|
|
|
|
|
|
|
32-bit words of expanded key K0,...,K39 and the 4 key-dependent |
|
49
|
|
|
|
|
|
|
S-boxes used in the g function. */ |
|
50
|
|
|
|
|
|
|
|
|
51
|
20297
|
|
|
|
|
|
struct twofish *twofish_setup(unsigned char *key, int len) |
|
52
|
|
|
|
|
|
|
{ |
|
53
|
|
|
|
|
|
|
int i; |
|
54
|
|
|
|
|
|
|
uint32_t a, b, x; |
|
55
|
|
|
|
|
|
|
struct twofish *t; |
|
56
|
|
|
|
|
|
|
unsigned char *s, skey[16]; |
|
57
|
|
|
|
|
|
|
|
|
58
|
20297
|
50
|
|
|
|
|
if ((t = malloc(sizeof(struct twofish))) == NULL) |
|
59
|
0
|
|
|
|
|
|
return NULL; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
/* The key consists of k=len/8 (2, 3 or 4) 64-bit units. */ |
|
62
|
20297
|
|
|
|
|
|
t->len = len /= 8; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
/* We must derive three vectors Me, Mo, and S, each with k 32-bit |
|
65
|
|
|
|
|
|
|
words, from the 2k words in the key. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Me = (key[0], key[2], ..., key[2k-2]) (even words) |
|
68
|
|
|
|
|
|
|
Mo = (key[1], key[3], ..., key[2k-1]) (odd words) |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
The third vector is derived by multiplying each of the k groups |
|
71
|
|
|
|
|
|
|
of 8 bytes from the key by a 4x8 matrix, to get k 32-bit words. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
S = (S[k-1], S[k-2], ..., S[0]) |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
where S[i] are the 4 bytes from the multiplication, interpreted |
|
76
|
|
|
|
|
|
|
as a 32-bit word. As described later, mds_rem is equivalent to |
|
77
|
|
|
|
|
|
|
the matrix multiplication, but faster. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Since all these vectors are going to be used byte-by-byte, we |
|
80
|
|
|
|
|
|
|
avoid converting them to words altogether, and write the bytes of |
|
81
|
|
|
|
|
|
|
S into the array skey below: */ |
|
82
|
|
|
|
|
|
|
|
|
83
|
20297
|
|
|
|
|
|
s = skey + 4*(len - 1); |
|
84
|
61185
|
100
|
|
|
|
|
for (i = 0; i < len; i++) { |
|
85
|
40888
|
|
|
|
|
|
x = mds_rem(strtonl(key+8*i), strtonl(key+8*i+4)); |
|
86
|
40888
|
|
|
|
|
|
nltostr(x, s); |
|
87
|
40888
|
|
|
|
|
|
s -= 4; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
20297
|
|
|
|
|
|
s = skey; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
/* The words of the expanded key K are defined using the h function: |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
rho = 2^24 + 2^16 + 2^8 + 2^0 (0x01010101) |
|
94
|
|
|
|
|
|
|
A[i] = h(2i*rho, Me) |
|
95
|
|
|
|
|
|
|
B[i] = ROL(h(2(i+1)*rho, Mo), 8) |
|
96
|
|
|
|
|
|
|
K[2i] = (A[i] + B[i]) mod 2^32 |
|
97
|
|
|
|
|
|
|
K[2i+1] = ROL((A[i] + 2B[i]) mod 2^32, 9) |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
rho has the property that, for i = 0..255, the word i*rho |
|
100
|
|
|
|
|
|
|
consists of four equal bytes, each with the value i. The function |
|
101
|
|
|
|
|
|
|
h is only applied to words of this type, so we only pass it the |
|
102
|
|
|
|
|
|
|
value of i. |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
We also didn't generate the vectors Me and Mo separately: we pass |
|
105
|
|
|
|
|
|
|
the entire key, and indicate whether we want the even or odd |
|
106
|
|
|
|
|
|
|
words to be used. */ |
|
107
|
|
|
|
|
|
|
|
|
108
|
426237
|
100
|
|
|
|
|
for (i = 0; i < 40; i += 2) { |
|
109
|
405940
|
|
|
|
|
|
a = h(len, i, key, 0); |
|
110
|
405940
|
|
|
|
|
|
b = rol(h(len, i+1, key, 1), 8); |
|
111
|
|
|
|
|
|
|
|
|
112
|
405940
|
|
|
|
|
|
t->K[i] = a+b; |
|
113
|
405940
|
|
|
|
|
|
t->K[i+1] = rol(a+2*b, 9); |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* The key-dependent S-boxes used in the g() function are created |
|
117
|
|
|
|
|
|
|
below. They are defined by g(X) = h(X, S), where S is the vector |
|
118
|
|
|
|
|
|
|
derived from the key. That is, for i=0..3, the S-box S[i] is |
|
119
|
|
|
|
|
|
|
formed by mapping from x[i] to y[i] in the h function. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
The relevant lookup tables qN have been precomputed and stored in |
|
122
|
|
|
|
|
|
|
tables.h; we also perform full key precomputations incorporating |
|
123
|
|
|
|
|
|
|
the MDS matrix multiplications. */ |
|
124
|
|
|
|
|
|
|
|
|
125
|
20297
|
|
|
|
|
|
switch (len) { |
|
126
|
|
|
|
|
|
|
case 2: |
|
127
|
5165957
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
|
128
|
5145856
|
|
|
|
|
|
x = (unsigned char)i; |
|
129
|
5145856
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][x]^s[4]]^s[0]]; |
|
130
|
5145856
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][x]^s[5]]^s[1]]; |
|
131
|
5145856
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][x]^s[6]]^s[2]]; |
|
132
|
5145856
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][x]^s[7]]^s[3]]; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
20101
|
|
|
|
|
|
break; |
|
135
|
|
|
|
|
|
|
case 3: |
|
136
|
25186
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
|
137
|
25088
|
|
|
|
|
|
x = (unsigned char)i; |
|
138
|
25088
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][q[1][x]^s[ 8]]^s[4]]^s[0]]; |
|
139
|
25088
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][q[1][x]^s[ 9]]^s[5]]^s[1]]; |
|
140
|
25088
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][q[0][x]^s[10]]^s[6]]^s[2]]; |
|
141
|
25088
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][q[0][x]^s[11]]^s[7]]^s[3]]; |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
} |
|
144
|
98
|
|
|
|
|
|
break; |
|
145
|
|
|
|
|
|
|
case 4: |
|
146
|
25186
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
|
147
|
25088
|
|
|
|
|
|
x = (unsigned char)i; |
|
148
|
25088
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][q[1][q[1][x]^s[12]]^s[ 8]]^s[4]]^s[0]]; |
|
149
|
25088
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][q[1][q[0][x]^s[13]]^s[ 9]]^s[5]]^s[1]]; |
|
150
|
25088
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][q[0][q[0][x]^s[14]]^s[10]]^s[6]]^s[2]]; |
|
151
|
25088
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][q[0][q[1][x]^s[15]]^s[11]]^s[7]]^s[3]]; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
98
|
|
|
|
|
|
break; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
20297
|
|
|
|
|
|
return t; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
20297
|
|
|
|
|
|
void twofish_free(struct twofish *self) |
|
160
|
|
|
|
|
|
|
{ |
|
161
|
20297
|
|
|
|
|
|
free(self); |
|
162
|
20297
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
/* The function g splits the input word x into four bytes; each byte is |
|
165
|
|
|
|
|
|
|
run through its own key-dependent S-box. Each S-box is bijective, |
|
166
|
|
|
|
|
|
|
takes 8 bits of input and produces 8 bits of output. The four results |
|
167
|
|
|
|
|
|
|
are interpreted as a vector of length 4 over GF(2^8), and multiplied |
|
168
|
|
|
|
|
|
|
by the 4x4 MDS matrix. The resulting vector is interpreted as a |
|
169
|
|
|
|
|
|
|
32-bit word. |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Since we have performed the full key precomputations, g consists only |
|
172
|
|
|
|
|
|
|
of four lookups and three XORs. g0 is g; g1 is a shortcut for |
|
173
|
|
|
|
|
|
|
g(ROL(x, 8)). */ |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
#define g0(x) \ |
|
176
|
|
|
|
|
|
|
t->S[0][byte(x,0)]^t->S[1][byte(x,1)]^t->S[2][byte(x,2)]^t->S[3][byte(x,3)] |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#define g1(x) \ |
|
179
|
|
|
|
|
|
|
t->S[0][byte(x,3)]^t->S[1][byte(x,0)]^t->S[2][byte(x,1)]^t->S[3][byte(x,2)] |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
/* F is a key-dependent permutation on 64-bit values. It takes two input |
|
182
|
|
|
|
|
|
|
words R0 and R1, and a round number r: |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
T0 = g(R0) |
|
185
|
|
|
|
|
|
|
T1 = g(ROL(R1, 8)) |
|
186
|
|
|
|
|
|
|
F0 = (T0 + T1 + K[2r+8]) |
|
187
|
|
|
|
|
|
|
F1 = (T0 + 2*T1 + K[2r+9]) |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Each of the 16 encryption rounds consists of the following operations: |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
(F0, F1) = F(R0, R1, r) |
|
192
|
|
|
|
|
|
|
R0 = ROR(R2 ^ F0, 1) |
|
193
|
|
|
|
|
|
|
R1 = ROL(R3, 1) ^ F1 |
|
194
|
|
|
|
|
|
|
R2 = R0 |
|
195
|
|
|
|
|
|
|
R3 = R1 |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
For efficiency, two rounds are combined into one in the macros below. */ |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
#define f_2rounds(i) \ |
|
200
|
|
|
|
|
|
|
t0 = g0(R[0]); \ |
|
201
|
|
|
|
|
|
|
t1 = g1(R[1]); \ |
|
202
|
|
|
|
|
|
|
R[2] = ror(R[2] ^ (t0 + t1 + t->K[4*i+8]), 1); \ |
|
203
|
|
|
|
|
|
|
R[3] = rol(R[3], 1) ^ (t0 + 2*t1 + t->K[4*i+9]); \ |
|
204
|
|
|
|
|
|
|
t0 = g0(R[2]); \ |
|
205
|
|
|
|
|
|
|
t1 = g1(R[3]); \ |
|
206
|
|
|
|
|
|
|
R[0] = ror(R[0] ^ (t0 + t1 + t->K[4*i+10]), 1); \ |
|
207
|
|
|
|
|
|
|
R[1] = rol(R[1], 1) ^ (t0 + 2*t1 + t->K[4*i+11]); |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
/* This is the inverse of f_2rounds */ |
|
210
|
|
|
|
|
|
|
#define i_2rounds(i) \ |
|
211
|
|
|
|
|
|
|
t0 = g0(R[0]); \ |
|
212
|
|
|
|
|
|
|
t1 = g1(R[1]); \ |
|
213
|
|
|
|
|
|
|
R[2] = rol(R[2], 1) ^ (t0 + t1 + t->K[4*i+10]); \ |
|
214
|
|
|
|
|
|
|
R[3] = ror(R[3] ^ (t0 + 2*t1 + t->K[4*i+11]), 1); \ |
|
215
|
|
|
|
|
|
|
t0 = g0(R[2]); \ |
|
216
|
|
|
|
|
|
|
t1 = g1(R[3]); \ |
|
217
|
|
|
|
|
|
|
R[0] = rol(R[0], 1) ^ (t0 + t1 + t->K[4*i+8]); \ |
|
218
|
|
|
|
|
|
|
R[1] = ror(R[1] ^ (t0 + 2*t1 + t->K[4*i+9]), 1) |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
/* This function encrypts or decrypts 16 bytes of input data and writes |
|
221
|
|
|
|
|
|
|
it to output, using the key defined in t. */ |
|
222
|
|
|
|
|
|
|
|
|
223
|
40296
|
|
|
|
|
|
void twofish_crypt(struct twofish *t, |
|
224
|
|
|
|
|
|
|
unsigned char *input, unsigned char *output, |
|
225
|
|
|
|
|
|
|
int decrypt) |
|
226
|
|
|
|
|
|
|
{ |
|
227
|
|
|
|
|
|
|
uint32_t t0, t1, R[4], out[4]; |
|
228
|
|
|
|
|
|
|
|
|
229
|
40296
|
100
|
|
|
|
|
if (!decrypt) { |
|
230
|
|
|
|
|
|
|
/* Whiten four 32-bit input words. */ |
|
231
|
20148
|
|
|
|
|
|
R[0] = t->K[0] ^ strtonl(input); |
|
232
|
20148
|
|
|
|
|
|
R[1] = t->K[1] ^ strtonl(input+4); |
|
233
|
20148
|
|
|
|
|
|
R[2] = t->K[2] ^ strtonl(input+8); |
|
234
|
20148
|
|
|
|
|
|
R[3] = t->K[3] ^ strtonl(input+12); |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
/* 16 rounds of encryption, combined into 8 pairs. */ |
|
237
|
20148
|
|
|
|
|
|
f_2rounds(0); f_2rounds(1); f_2rounds(2); f_2rounds(3); |
|
238
|
20148
|
|
|
|
|
|
f_2rounds(4); f_2rounds(5); f_2rounds(6); f_2rounds(7); |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
/* Output whitening; The order of R[n] undoes the last swap. */ |
|
241
|
20148
|
|
|
|
|
|
out[0] = t->K[4] ^ R[2]; |
|
242
|
20148
|
|
|
|
|
|
out[1] = t->K[5] ^ R[3]; |
|
243
|
20148
|
|
|
|
|
|
out[2] = t->K[6] ^ R[0]; |
|
244
|
20148
|
|
|
|
|
|
out[3] = t->K[7] ^ R[1]; |
|
245
|
|
|
|
|
|
|
} else { |
|
246
|
20148
|
|
|
|
|
|
R[0] = t->K[4] ^ strtonl(input); |
|
247
|
20148
|
|
|
|
|
|
R[1] = t->K[5] ^ strtonl(input+4); |
|
248
|
20148
|
|
|
|
|
|
R[2] = t->K[6] ^ strtonl(input+8); |
|
249
|
20148
|
|
|
|
|
|
R[3] = t->K[7] ^ strtonl(input+12); |
|
250
|
|
|
|
|
|
|
|
|
251
|
20148
|
|
|
|
|
|
i_2rounds(7); i_2rounds(6); i_2rounds(5); i_2rounds(4); |
|
252
|
20148
|
|
|
|
|
|
i_2rounds(3); i_2rounds(2); i_2rounds(1); i_2rounds(0); |
|
253
|
|
|
|
|
|
|
|
|
254
|
20148
|
|
|
|
|
|
out[0] = t->K[0] ^ R[2]; |
|
255
|
20148
|
|
|
|
|
|
out[1] = t->K[1] ^ R[3]; |
|
256
|
20148
|
|
|
|
|
|
out[2] = t->K[2] ^ R[0]; |
|
257
|
20148
|
|
|
|
|
|
out[3] = t->K[3] ^ R[1]; |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
/* Write 16 output bytes. */ |
|
261
|
40296
|
|
|
|
|
|
nltostr(out[0], output); |
|
262
|
40296
|
|
|
|
|
|
nltostr(out[1], output+4); |
|
263
|
40296
|
|
|
|
|
|
nltostr(out[2], output+8); |
|
264
|
40296
|
|
|
|
|
|
nltostr(out[3], output+12); |
|
265
|
40296
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
/* h takes a 32-bit word X, and a list, L = (L[0],...,L[k-1]), of 32-bit |
|
268
|
|
|
|
|
|
|
words, and produces one word of output. During each of the k stages |
|
269
|
|
|
|
|
|
|
of the function, the four bytes from X are each passed through a |
|
270
|
|
|
|
|
|
|
fixed S-box, and XORed with a byte derived from the list. Finally, |
|
271
|
|
|
|
|
|
|
the bytes are once again passed through an S-box and multiplied by |
|
272
|
|
|
|
|
|
|
the MDS matrix, just as in g. |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
We use the Lbyte macro to extract a given byte from the list L |
|
275
|
|
|
|
|
|
|
(expressed in little endian). */ |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#define Lbyte(w, b) L[4*(2*w+odd)+b] |
|
278
|
|
|
|
|
|
|
|
|
279
|
1217820
|
|
|
|
|
|
static uint32_t h(int len, const int X, unsigned char *L, int odd) |
|
280
|
|
|
|
|
|
|
{ |
|
281
|
|
|
|
|
|
|
unsigned char b0, b1, b2, b3; |
|
282
|
|
|
|
|
|
|
|
|
283
|
1217820
|
|
|
|
|
|
b0 = b1 = b2 = b3 = (unsigned char)X; |
|
284
|
|
|
|
|
|
|
|
|
285
|
1217820
|
|
|
|
|
|
switch (len) { |
|
286
|
|
|
|
|
|
|
case 4: |
|
287
|
5880
|
|
|
|
|
|
b0 = q[1][b0] ^ Lbyte(3, 0); |
|
288
|
5880
|
|
|
|
|
|
b1 = q[0][b1] ^ Lbyte(3, 1); |
|
289
|
5880
|
|
|
|
|
|
b2 = q[0][b2] ^ Lbyte(3, 2); |
|
290
|
5880
|
|
|
|
|
|
b3 = q[1][b3] ^ Lbyte(3, 3); |
|
291
|
|
|
|
|
|
|
case 3: |
|
292
|
11760
|
|
|
|
|
|
b0 = q[1][b0] ^ Lbyte(2, 0); |
|
293
|
11760
|
|
|
|
|
|
b1 = q[1][b1] ^ Lbyte(2, 1); |
|
294
|
11760
|
|
|
|
|
|
b2 = q[0][b2] ^ Lbyte(2, 2); |
|
295
|
11760
|
|
|
|
|
|
b3 = q[0][b3] ^ Lbyte(2, 3); |
|
296
|
|
|
|
|
|
|
case 2: |
|
297
|
1217820
|
|
|
|
|
|
b0 = q[0][q[0][b0] ^ Lbyte(1, 0)] ^ Lbyte(0, 0); |
|
298
|
1217820
|
|
|
|
|
|
b1 = q[0][q[1][b1] ^ Lbyte(1, 1)] ^ Lbyte(0, 1); |
|
299
|
1217820
|
|
|
|
|
|
b2 = q[1][q[0][b2] ^ Lbyte(1, 2)] ^ Lbyte(0, 2); |
|
300
|
1217820
|
|
|
|
|
|
b3 = q[1][q[1][b3] ^ Lbyte(1, 3)] ^ Lbyte(0, 3); |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
1217820
|
|
|
|
|
|
return m[0][b0] ^ m[1][b1] ^ m[2][b2] ^ m[3][b3]; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
/* The (12, 8) Reed Solomon code has the generator polynomial: |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1 |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
where the coefficients are in the finite field GF(2^8) with a modular |
|
311
|
|
|
|
|
|
|
polynomial a^8+a^6+a^3+a^2+1. To generate the remainder, we have to |
|
312
|
|
|
|
|
|
|
start with a 12th order polynomial with our eight input bytes as the |
|
313
|
|
|
|
|
|
|
coefficients of the 4th to 11th terms: |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
We then multiply the generator polynomial by m[7]*x^7 and subtract it |
|
318
|
|
|
|
|
|
|
(XOR in GF(2^8)) from the above to eliminate the x^7 term (the |
|
319
|
|
|
|
|
|
|
arithmetic on the coefficients is done in GF(2^8)). We then multiply |
|
320
|
|
|
|
|
|
|
the generator polynomial by m[6]*x^6 and use this to remove the x^10 |
|
321
|
|
|
|
|
|
|
term, and so on until the x^4 term is removed, and we are left with: |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
which give the resulting 4 bytes of the remainder. This is equivalent |
|
326
|
|
|
|
|
|
|
to the matrix multiplication described in the Twofish paper, but is |
|
327
|
|
|
|
|
|
|
much faster. */ |
|
328
|
|
|
|
|
|
|
|
|
329
|
40888
|
|
|
|
|
|
static uint32_t mds_rem(uint32_t a, uint32_t b) |
|
330
|
|
|
|
|
|
|
{ |
|
331
|
|
|
|
|
|
|
int i; |
|
332
|
|
|
|
|
|
|
uint32_t t, u; |
|
333
|
|
|
|
|
|
|
enum { G_MOD = 0x0000014d }; |
|
334
|
|
|
|
|
|
|
|
|
335
|
367992
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) { |
|
336
|
|
|
|
|
|
|
/* Get most significant coefficient */ |
|
337
|
327104
|
|
|
|
|
|
t = b >> 24; |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
/* Shift the others up */ |
|
340
|
327104
|
|
|
|
|
|
b = (b << 8) | (a >> 24); |
|
341
|
327104
|
|
|
|
|
|
a <<= 8; |
|
342
|
|
|
|
|
|
|
|
|
343
|
327104
|
|
|
|
|
|
u = t << 1; |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
/* Subtract the modular polynomial on overflow */ |
|
346
|
327104
|
100
|
|
|
|
|
if (t & 0x80) |
|
347
|
263436
|
|
|
|
|
|
u ^= G_MOD; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
/* Remove t * (a * x^2 + 1) */ |
|
350
|
327104
|
|
|
|
|
|
b ^= t ^ (u << 16); |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
/* Form u = a*t + t/a = t*(a + 1/a) */ |
|
353
|
327104
|
|
|
|
|
|
u ^= t >> 1; |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
/* Add the modular polynomial on underflow */ |
|
356
|
327104
|
100
|
|
|
|
|
if (t & 0x01) |
|
357
|
223293
|
|
|
|
|
|
u ^= G_MOD >> 1; |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
/* Remove t * (a + 1/a) * (x^3 + x) */ |
|
360
|
327104
|
|
|
|
|
|
b ^= (u << 24) | (u << 8); |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
40888
|
|
|
|
|
|
return b; |
|
364
|
|
|
|
|
|
|
} |