| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/** |
|
2
|
|
|
|
|
|
|
* @file pstm_sqr_comba.c |
|
3
|
|
|
|
|
|
|
* @version 950bba4 (HEAD -> master) |
|
4
|
|
|
|
|
|
|
* |
|
5
|
|
|
|
|
|
|
* Multiprecision Squaring with Comba technique. |
|
6
|
|
|
|
|
|
|
*/ |
|
7
|
|
|
|
|
|
|
/* |
|
8
|
|
|
|
|
|
|
* Copyright (c) 2013-2017 INSIDE Secure Corporation |
|
9
|
|
|
|
|
|
|
* Copyright (c) PeerSec Networks, 2002-2011 |
|
10
|
|
|
|
|
|
|
* All Rights Reserved |
|
11
|
|
|
|
|
|
|
* |
|
12
|
|
|
|
|
|
|
* The latest version of this code is available at http://www.matrixssl.org |
|
13
|
|
|
|
|
|
|
* |
|
14
|
|
|
|
|
|
|
* This software is open source; you can redistribute it and/or modify |
|
15
|
|
|
|
|
|
|
* it under the terms of the GNU General Public License as published by |
|
16
|
|
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
17
|
|
|
|
|
|
|
* (at your option) any later version. |
|
18
|
|
|
|
|
|
|
* |
|
19
|
|
|
|
|
|
|
* This General Public License does NOT permit incorporating this software |
|
20
|
|
|
|
|
|
|
* into proprietary programs. If you are unable to comply with the GPL, a |
|
21
|
|
|
|
|
|
|
* commercial license for this software may be purchased from INSIDE at |
|
22
|
|
|
|
|
|
|
* http://www.insidesecure.com/ |
|
23
|
|
|
|
|
|
|
* |
|
24
|
|
|
|
|
|
|
* This program is distributed in WITHOUT ANY WARRANTY; without even the |
|
25
|
|
|
|
|
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
26
|
|
|
|
|
|
|
* See the GNU General Public License for more details. |
|
27
|
|
|
|
|
|
|
* |
|
28
|
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License |
|
29
|
|
|
|
|
|
|
* along with this program; if not, write to the Free Software |
|
30
|
|
|
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
31
|
|
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
32
|
|
|
|
|
|
|
*/ |
|
33
|
|
|
|
|
|
|
/******************************************************************************/ |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#include "../cryptoImpl.h" |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#if defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) || defined(USE_MATRIX_DH) |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
/******************************************************************************/ |
|
40
|
|
|
|
|
|
|
# if defined(PSTM_X86) |
|
41
|
|
|
|
|
|
|
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ |
|
42
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__i386__) |
|
43
|
|
|
|
|
|
|
# error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" |
|
44
|
|
|
|
|
|
|
# endif |
|
45
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit x86 Assembly Optimizations") */ |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# define COMBA_START |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
|
50
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
|
53
|
|
|
|
|
|
|
x = c0; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
|
56
|
|
|
|
|
|
|
x = c1; |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
|
59
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# define COMBA_FINI |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
|
64
|
|
|
|
|
|
|
asm ( \ |
|
65
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
|
66
|
|
|
|
|
|
|
"mull %%eax \n\t" \ |
|
67
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
|
68
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
|
69
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
|
70
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "m" (i) : "%eax", "%edx", "cc"); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
|
73
|
|
|
|
|
|
|
asm ( \ |
|
74
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
|
75
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
|
76
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
|
77
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
|
78
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
|
79
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
|
80
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
|
81
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
|
82
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "m" (i), "m" (j) : "%eax", "%edx", "cc"); |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
|
85
|
|
|
|
|
|
|
asm ( \ |
|
86
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
|
87
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
|
88
|
|
|
|
|
|
|
"movl %%eax,%0 \n\t" \ |
|
89
|
|
|
|
|
|
|
"movl %%edx,%1 \n\t" \ |
|
90
|
|
|
|
|
|
|
"xorl %2,%2 \n\t" \ |
|
91
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%eax", "%edx", "cc"); |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
|
94
|
|
|
|
|
|
|
asm ( \ |
|
95
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
|
96
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
|
97
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
|
98
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
|
99
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
|
100
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%eax", "%edx", "cc"); |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# define SQRADDDB \ |
|
103
|
|
|
|
|
|
|
asm ( \ |
|
104
|
|
|
|
|
|
|
"addl %6,%0 \n\t" \ |
|
105
|
|
|
|
|
|
|
"adcl %7,%1 \n\t" \ |
|
106
|
|
|
|
|
|
|
"adcl %8,%2 \n\t" \ |
|
107
|
|
|
|
|
|
|
"addl %6,%0 \n\t" \ |
|
108
|
|
|
|
|
|
|
"adcl %7,%1 \n\t" \ |
|
109
|
|
|
|
|
|
|
"adcl %8,%2 \n\t" \ |
|
110
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (sc0), "r" (sc1), "r" (sc2) : "cc"); |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/******************************************************************************/ |
|
113
|
|
|
|
|
|
|
# elif defined(PSTM_X86_64) |
|
114
|
|
|
|
|
|
|
/* x86-64 optimized */ |
|
115
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) |
|
116
|
|
|
|
|
|
|
# error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" |
|
117
|
|
|
|
|
|
|
# endif |
|
118
|
|
|
|
|
|
|
/* #pragma message ("Using 64 bit x86_64 Assembly Optimizations") */ |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# define COMBA_START |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
|
123
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
|
126
|
|
|
|
|
|
|
x = c0; |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
|
129
|
|
|
|
|
|
|
x = c1; |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
|
132
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# define COMBA_FINI |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
|
137
|
|
|
|
|
|
|
asm ( \ |
|
138
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
|
139
|
|
|
|
|
|
|
"mulq %%rax \n\t" \ |
|
140
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
|
141
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
|
142
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
|
143
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "g" (i) : "%rax", "%rdx", "cc"); |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
|
146
|
|
|
|
|
|
|
asm ( \ |
|
147
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
|
148
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
|
149
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
|
150
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
|
151
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
|
152
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
|
153
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
|
154
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
|
155
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
|
158
|
|
|
|
|
|
|
asm ( \ |
|
159
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
|
160
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
|
161
|
|
|
|
|
|
|
"movq %%rax,%0 \n\t" \ |
|
162
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
163
|
|
|
|
|
|
|
"xorq %2,%2 \n\t" \ |
|
164
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
|
167
|
|
|
|
|
|
|
asm ( \ |
|
168
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
|
169
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
|
170
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
|
171
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
|
172
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
|
173
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# define SQRADDDB \ |
|
176
|
|
|
|
|
|
|
asm ( \ |
|
177
|
|
|
|
|
|
|
"addq %6,%0 \n\t" \ |
|
178
|
|
|
|
|
|
|
"adcq %7,%1 \n\t" \ |
|
179
|
|
|
|
|
|
|
"adcq %8,%2 \n\t" \ |
|
180
|
|
|
|
|
|
|
"addq %6,%0 \n\t" \ |
|
181
|
|
|
|
|
|
|
"adcq %7,%1 \n\t" \ |
|
182
|
|
|
|
|
|
|
"adcq %8,%2 \n\t" \ |
|
183
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (sc0), "r" (sc1), "r" (sc2) : "cc"); |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
/******************************************************************************/ |
|
186
|
|
|
|
|
|
|
# elif defined(PSTM_ARM) |
|
187
|
|
|
|
|
|
|
/* ARM code */ |
|
188
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit ARM Assembly Optimizations") */ |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# define COMBA_START |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
|
193
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
|
196
|
|
|
|
|
|
|
x = c0; |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
|
199
|
|
|
|
|
|
|
x = c1; |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
|
202
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# define COMBA_FINI |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
|
207
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
|
208
|
|
|
|
|
|
|
asm ( \ |
|
209
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%6 \n\t" \ |
|
210
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
|
211
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
|
212
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
|
213
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i) : "r0", "r1", "cc"); |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
|
216
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
|
217
|
|
|
|
|
|
|
asm ( \ |
|
218
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%7 \n\t" \ |
|
219
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
|
220
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
|
221
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
|
222
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
|
223
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
|
224
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
|
225
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i), "r" (j) : "r0", "r1", "cc"); |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
|
228
|
|
|
|
|
|
|
asm ( \ |
|
229
|
|
|
|
|
|
|
" UMULL %0,%1,%6,%7 \n\t" \ |
|
230
|
|
|
|
|
|
|
" SUB %2,%2,%2 \n\t" \ |
|
231
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "cc"); |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
|
234
|
|
|
|
|
|
|
asm ( \ |
|
235
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%7 \n\t" \ |
|
236
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
|
237
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
|
238
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
|
239
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "r0", "r1", "cc"); |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# define SQRADDDB \ |
|
242
|
|
|
|
|
|
|
asm ( \ |
|
243
|
|
|
|
|
|
|
" ADDS %0,%0,%3 \n\t" \ |
|
244
|
|
|
|
|
|
|
" ADCS %1,%1,%4 \n\t" \ |
|
245
|
|
|
|
|
|
|
" ADC %2,%2,%5 \n\t" \ |
|
246
|
|
|
|
|
|
|
" ADDS %0,%0,%3 \n\t" \ |
|
247
|
|
|
|
|
|
|
" ADCS %1,%1,%4 \n\t" \ |
|
248
|
|
|
|
|
|
|
" ADC %2,%2,%5 \n\t" \ |
|
249
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "r" (sc0), "r" (sc1), "r" (sc2), "0" (c0), "1" (c1), "2" (c2) : "cc"); |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
/******************************************************************************/ |
|
252
|
|
|
|
|
|
|
# elif defined(PSTM_MIPS) |
|
253
|
|
|
|
|
|
|
/* MIPS32 */ |
|
254
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit MIPS Assembly Optimizations") */ |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# define COMBA_START |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
|
259
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
|
262
|
|
|
|
|
|
|
x = c0; |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
|
265
|
|
|
|
|
|
|
x = c1; |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
|
268
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# define COMBA_FINI |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
|
273
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
|
274
|
|
|
|
|
|
|
asm ( \ |
|
275
|
|
|
|
|
|
|
" multu %6,%6 \n\t" \ |
|
276
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
|
277
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
|
278
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
|
279
|
|
|
|
|
|
|
" sltu $12,%0,$12 \n\t" \ |
|
280
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
|
281
|
|
|
|
|
|
|
" sltu $13,%1,$13 \n\t" \ |
|
282
|
|
|
|
|
|
|
" addu %1,%1,$12 \n\t" \ |
|
283
|
|
|
|
|
|
|
" sltu $12,%1,$12 \n\t" \ |
|
284
|
|
|
|
|
|
|
" addu %2,%2,$13 \n\t" \ |
|
285
|
|
|
|
|
|
|
" addu %2,%2,$12 \n\t" \ |
|
286
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i) : "$12", "$13"); |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
|
289
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
|
290
|
|
|
|
|
|
|
asm ( \ |
|
291
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
|
292
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
|
293
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
|
294
|
|
|
|
|
|
|
\ |
|
295
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
|
296
|
|
|
|
|
|
|
" sltu $14,%0,$12 \n\t" \ |
|
297
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
|
298
|
|
|
|
|
|
|
" sltu $15,%1,$13 \n\t" \ |
|
299
|
|
|
|
|
|
|
" addu %1,%1,$14 \n\t" \ |
|
300
|
|
|
|
|
|
|
" sltu $14,%1,$14 \n\t" \ |
|
301
|
|
|
|
|
|
|
" addu %2,%2,$15 \n\t" \ |
|
302
|
|
|
|
|
|
|
" addu %2,%2,$14 \n\t" \ |
|
303
|
|
|
|
|
|
|
\ |
|
304
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
|
305
|
|
|
|
|
|
|
" sltu $14,%0,$12 \n\t" \ |
|
306
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
|
307
|
|
|
|
|
|
|
" sltu $15,%1,$13 \n\t" \ |
|
308
|
|
|
|
|
|
|
" addu %1,%1,$14 \n\t" \ |
|
309
|
|
|
|
|
|
|
" sltu $14,%1,$14 \n\t" \ |
|
310
|
|
|
|
|
|
|
" addu %2,%2,$15 \n\t" \ |
|
311
|
|
|
|
|
|
|
" addu %2,%2,$14 \n\t" \ |
|
312
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i), "r" (j) : "$12", "$13", "$14", "$15"); |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
|
315
|
|
|
|
|
|
|
asm ( \ |
|
316
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
|
317
|
|
|
|
|
|
|
" mflo %0 \n\t" \ |
|
318
|
|
|
|
|
|
|
" mfhi %1 \n\t" \ |
|
319
|
|
|
|
|
|
|
" xor %2,%2,%2 \n\t" \ |
|
320
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "cc"); |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
|
323
|
|
|
|
|
|
|
asm ( \ |
|
324
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
|
325
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
|
326
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
|
327
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
|
328
|
|
|
|
|
|
|
" sltu $12,%0,$12 \n\t" \ |
|
329
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
|
330
|
|
|
|
|
|
|
" sltu $13,%1,$13 \n\t" \ |
|
331
|
|
|
|
|
|
|
" addu %1,%1,$12 \n\t" \ |
|
332
|
|
|
|
|
|
|
" sltu $12,%1,$12 \n\t" \ |
|
333
|
|
|
|
|
|
|
" addu %2,%2,$13 \n\t" \ |
|
334
|
|
|
|
|
|
|
" addu %2,%2,$12 \n\t" \ |
|
335
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "$12", "$13", "$14"); |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# define SQRADDDB \ |
|
338
|
|
|
|
|
|
|
asm ( \ |
|
339
|
|
|
|
|
|
|
" addu %0,%0,%3 \n\t" \ |
|
340
|
|
|
|
|
|
|
" sltu $10,%0,%3 \n\t" \ |
|
341
|
|
|
|
|
|
|
" addu %1,%1,$10 \n\t" \ |
|
342
|
|
|
|
|
|
|
" sltu $10,%1,$10 \n\t" \ |
|
343
|
|
|
|
|
|
|
" addu %1,%1,%4 \n\t" \ |
|
344
|
|
|
|
|
|
|
" sltu $11,%1,%4 \n\t" \ |
|
345
|
|
|
|
|
|
|
" addu %2,%2,$10 \n\t" \ |
|
346
|
|
|
|
|
|
|
" addu %2,%2,$11 \n\t" \ |
|
347
|
|
|
|
|
|
|
" addu %2,%2,%5 \n\t" \ |
|
348
|
|
|
|
|
|
|
\ |
|
349
|
|
|
|
|
|
|
" addu %0,%0,%3 \n\t" \ |
|
350
|
|
|
|
|
|
|
" sltu $10,%0,%3 \n\t" \ |
|
351
|
|
|
|
|
|
|
" addu %1,%1,$10 \n\t" \ |
|
352
|
|
|
|
|
|
|
" sltu $10,%1,$10 \n\t" \ |
|
353
|
|
|
|
|
|
|
" addu %1,%1,%4 \n\t" \ |
|
354
|
|
|
|
|
|
|
" sltu $11,%1,%4 \n\t" \ |
|
355
|
|
|
|
|
|
|
" addu %2,%2,$10 \n\t" \ |
|
356
|
|
|
|
|
|
|
" addu %2,%2,$11 \n\t" \ |
|
357
|
|
|
|
|
|
|
" addu %2,%2,%5 \n\t" \ |
|
358
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "r" (sc0), "r" (sc1), "r" (sc2), "0" (c0), "1" (c1), "2" (c2) : "$10", "$11"); |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# else |
|
361
|
|
|
|
|
|
|
/******************************************************************************/ |
|
362
|
|
|
|
|
|
|
# define PSTM_ISO |
|
363
|
|
|
|
|
|
|
/* ISO C portable code */ |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# define COMBA_START |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
|
368
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
|
371
|
|
|
|
|
|
|
x = c0; |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
|
374
|
|
|
|
|
|
|
x = c1; |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
|
377
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# define COMBA_FINI |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
|
382
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
|
383
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
384
|
|
|
|
|
|
|
t = c0 + ((pstm_word) i) * ((pstm_word) j); c0 = (pstm_digit) t; \ |
|
385
|
|
|
|
|
|
|
t = c1 + (t >> DIGIT_BIT); \ |
|
386
|
|
|
|
|
|
|
c1 = (pstm_digit) t; c2 += (pstm_digit) (t >> DIGIT_BIT); \ |
|
387
|
|
|
|
|
|
|
} while (0); |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
|
391
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
|
392
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
393
|
|
|
|
|
|
|
t = ((pstm_word) i) * ((pstm_word) j); \ |
|
394
|
|
|
|
|
|
|
tt = (pstm_word) c0 + t; c0 = (pstm_digit) tt; \ |
|
395
|
|
|
|
|
|
|
tt = (pstm_word) c1 + (tt >> DIGIT_BIT); \ |
|
396
|
|
|
|
|
|
|
c1 = (pstm_digit) tt; c2 += (pstm_digit) (tt >> DIGIT_BIT); \ |
|
397
|
|
|
|
|
|
|
tt = (pstm_word) c0 + t; c0 = (pstm_digit) tt; \ |
|
398
|
|
|
|
|
|
|
tt = (pstm_word) c1 + (tt >> DIGIT_BIT); \ |
|
399
|
|
|
|
|
|
|
c1 = (pstm_digit) tt; c2 += (pstm_digit) (tt >> DIGIT_BIT); \ |
|
400
|
|
|
|
|
|
|
} while (0); |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
|
403
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
404
|
|
|
|
|
|
|
t = ((pstm_word) i) * ((pstm_word) j); \ |
|
405
|
|
|
|
|
|
|
sc0 = (pstm_digit) t; sc1 = (pstm_digit) (t >> DIGIT_BIT); sc2 = 0; \ |
|
406
|
|
|
|
|
|
|
} while (0); |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
|
409
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
410
|
|
|
|
|
|
|
t = ((pstm_word) sc0) + ((pstm_word) i) * ((pstm_word) j); \ |
|
411
|
|
|
|
|
|
|
sc0 = (pstm_digit) t; \ |
|
412
|
|
|
|
|
|
|
t = ((pstm_word) sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit) t; \ |
|
413
|
|
|
|
|
|
|
sc2 += (pstm_digit) (t >> DIGIT_BIT); \ |
|
414
|
|
|
|
|
|
|
} while (0); |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# define SQRADDDB \ |
|
417
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
418
|
|
|
|
|
|
|
t = ((pstm_word) sc0) + ((pstm_word) sc0) + ((pstm_word) c0); \ |
|
419
|
|
|
|
|
|
|
c0 = (pstm_digit) t; \ |
|
420
|
|
|
|
|
|
|
t = ((pstm_word) sc1) + ((pstm_word) sc1) + c1 + (t >> DIGIT_BIT); \ |
|
421
|
|
|
|
|
|
|
c1 = (pstm_digit) t; \ |
|
422
|
|
|
|
|
|
|
c2 = c2 + sc2 + sc2 + (pstm_digit) (t >> DIGIT_BIT); \ |
|
423
|
|
|
|
|
|
|
} while (0); |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# endif /* ISO_C */ |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
/******************************************************************************/ |
|
428
|
|
|
|
|
|
|
/* |
|
429
|
|
|
|
|
|
|
Non-unrolled comba squarer |
|
430
|
|
|
|
|
|
|
*/ |
|
431
|
5430342
|
|
|
|
|
|
static int32_t pstm_sqr_comba_gen(psPool_t *pool, const pstm_int *A, |
|
432
|
|
|
|
|
|
|
pstm_int *B, pstm_digit *paD, psSize_t paDlen) |
|
433
|
|
|
|
|
|
|
{ |
|
434
|
|
|
|
|
|
|
int16 paDfail, pa; |
|
435
|
|
|
|
|
|
|
int32 ix, iz; |
|
436
|
|
|
|
|
|
|
pstm_digit c0, c1, c2, *dst; |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
|
439
|
|
|
|
|
|
|
pstm_word tt; |
|
440
|
|
|
|
|
|
|
# endif |
|
441
|
|
|
|
|
|
|
|
|
442
|
5430342
|
|
|
|
|
|
paDfail = 0; |
|
443
|
|
|
|
|
|
|
/* get size of output and trim */ |
|
444
|
5430342
|
|
|
|
|
|
pa = A->used + A->used; |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
/* number of output digits to produce */ |
|
447
|
|
|
|
|
|
|
COMBA_START; |
|
448
|
5430342
|
|
|
|
|
|
CLEAR_CARRY; |
|
449
|
|
|
|
|
|
|
/* |
|
450
|
|
|
|
|
|
|
If b is not large enough grow it and continue |
|
451
|
|
|
|
|
|
|
*/ |
|
452
|
5430342
|
100
|
|
|
|
|
if (B->alloc < pa) |
|
453
|
|
|
|
|
|
|
{ |
|
454
|
14
|
50
|
|
|
|
|
if (pstm_grow(B, pa) != PSTM_OKAY) |
|
455
|
|
|
|
|
|
|
{ |
|
456
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
} |
|
459
|
5430342
|
50
|
|
|
|
|
if (paD != NULL) |
|
460
|
|
|
|
|
|
|
{ |
|
461
|
5430342
|
50
|
|
|
|
|
if (paDlen < (sizeof(pstm_digit) * pa)) |
|
462
|
|
|
|
|
|
|
{ |
|
463
|
0
|
|
|
|
|
|
paDfail = 1; /* have a paD, but it's not big enough */ |
|
464
|
0
|
0
|
|
|
|
|
if ((dst = psMalloc(pool, sizeof(pstm_digit) * pa)) == NULL) |
|
465
|
|
|
|
|
|
|
{ |
|
466
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
0
|
|
|
|
|
|
memset(dst, 0x0, sizeof(pstm_digit) * pa); |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
else |
|
471
|
|
|
|
|
|
|
{ |
|
472
|
5430342
|
|
|
|
|
|
dst = paD; |
|
473
|
5430342
|
|
|
|
|
|
memset(dst, 0x0, paDlen); |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
else |
|
477
|
|
|
|
|
|
|
{ |
|
478
|
0
|
0
|
|
|
|
|
if ((dst = psMalloc(pool, sizeof(pstm_digit) * pa)) == NULL) |
|
479
|
|
|
|
|
|
|
{ |
|
480
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
481
|
|
|
|
|
|
|
} |
|
482
|
0
|
|
|
|
|
|
memset(dst, 0x0, sizeof(pstm_digit) * pa); |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
|
|
485
|
102818178
|
100
|
|
|
|
|
for (ix = 0; ix < pa; ix++) |
|
486
|
|
|
|
|
|
|
{ |
|
487
|
|
|
|
|
|
|
int32 tx, ty, iy; |
|
488
|
|
|
|
|
|
|
pstm_digit *tmpy, *tmpx; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
/* get offsets into the two bignums */ |
|
491
|
97387836
|
|
|
|
|
|
ty = min(A->used - 1, ix); |
|
492
|
97387836
|
|
|
|
|
|
tx = ix - ty; |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
/* setup temp aliases */ |
|
495
|
97387836
|
|
|
|
|
|
tmpx = A->dp + tx; |
|
496
|
97387836
|
|
|
|
|
|
tmpy = A->dp + ty; |
|
497
|
|
|
|
|
|
|
/* |
|
498
|
|
|
|
|
|
|
This is the number of times the loop will iterate |
|
499
|
|
|
|
|
|
|
while (tx++ < a->used && ty-- >= 0) { ... } |
|
500
|
|
|
|
|
|
|
*/ |
|
501
|
97387836
|
|
|
|
|
|
iy = min(A->used - tx, ty + 1); |
|
502
|
|
|
|
|
|
|
/* |
|
503
|
|
|
|
|
|
|
now for squaring, tx can never equal ty. We halve the distance since |
|
504
|
|
|
|
|
|
|
they approach at a rate of 2x and we have to round because odd cases |
|
505
|
|
|
|
|
|
|
need to be executed |
|
506
|
|
|
|
|
|
|
*/ |
|
507
|
97387836
|
|
|
|
|
|
iy = min(iy, (ty - tx + 1) >> 1); |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
/* forward carries */ |
|
510
|
97387836
|
|
|
|
|
|
CARRY_FORWARD; |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
/* execute loop */ |
|
513
|
292033650
|
100
|
|
|
|
|
for (iz = 0; iz < iy; iz++) |
|
514
|
|
|
|
|
|
|
{ |
|
515
|
194645814
|
|
|
|
|
|
SQRADD2(*tmpx++, *tmpy--); |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
/* even columns have the square term in them */ |
|
519
|
97387836
|
100
|
|
|
|
|
if ((ix & 1) == 0) |
|
520
|
|
|
|
|
|
|
{ |
|
521
|
48693918
|
|
|
|
|
|
SQRADD(A->dp[ix >> 1], A->dp[ix >> 1]); |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
/* store it */ |
|
525
|
97387836
|
|
|
|
|
|
COMBA_STORE(dst[ix]); |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
COMBA_FINI; |
|
529
|
|
|
|
|
|
|
/* |
|
530
|
|
|
|
|
|
|
setup dest |
|
531
|
|
|
|
|
|
|
*/ |
|
532
|
5430342
|
|
|
|
|
|
iz = B->used; |
|
533
|
5430342
|
|
|
|
|
|
B->used = pa; |
|
534
|
|
|
|
|
|
|
{ |
|
535
|
|
|
|
|
|
|
pstm_digit *tmpc; |
|
536
|
5430342
|
|
|
|
|
|
tmpc = B->dp; |
|
537
|
102818178
|
100
|
|
|
|
|
for (ix = 0; ix < pa; ix++) |
|
538
|
|
|
|
|
|
|
{ |
|
539
|
97387836
|
|
|
|
|
|
*tmpc++ = dst[ix]; |
|
540
|
|
|
|
|
|
|
} |
|
541
|
|
|
|
|
|
|
/* clear unused digits (that existed in the old copy of c) */ |
|
542
|
5555635
|
100
|
|
|
|
|
for (; ix < iz; ix++) |
|
543
|
|
|
|
|
|
|
{ |
|
544
|
125293
|
|
|
|
|
|
*tmpc++ = 0; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
} |
|
547
|
5430342
|
|
|
|
|
|
pstm_clamp(B); |
|
548
|
|
|
|
|
|
|
|
|
549
|
5430342
|
50
|
|
|
|
|
if ((paD == NULL) || paDfail == 1) |
|
|
|
50
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
{ |
|
551
|
0
|
|
|
|
|
|
psFree(dst, pool); |
|
552
|
|
|
|
|
|
|
} |
|
553
|
5430342
|
|
|
|
|
|
return PS_SUCCESS; |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
/******************************************************************************/ |
|
557
|
|
|
|
|
|
|
/* |
|
558
|
|
|
|
|
|
|
Unrolled Comba loop for 1024 bit keys |
|
559
|
|
|
|
|
|
|
*/ |
|
560
|
|
|
|
|
|
|
# ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS |
|
561
|
|
|
|
|
|
|
static int32_t pstm_sqr_comba16(const pstm_int *A, pstm_int *B) |
|
562
|
|
|
|
|
|
|
{ |
|
563
|
|
|
|
|
|
|
pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
|
566
|
|
|
|
|
|
|
pstm_word tt; |
|
567
|
|
|
|
|
|
|
# endif |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
if (B->alloc < 32) |
|
570
|
|
|
|
|
|
|
{ |
|
571
|
|
|
|
|
|
|
if (pstm_grow(B, 32) != PSTM_OKAY) |
|
572
|
|
|
|
|
|
|
{ |
|
573
|
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
a = A->dp; |
|
577
|
|
|
|
|
|
|
sc0 = sc1 = sc2 = 0; |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
COMBA_START; |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
/* clear carries */ |
|
582
|
|
|
|
|
|
|
CLEAR_CARRY; |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
/* output 0 */ |
|
585
|
|
|
|
|
|
|
SQRADD(a[0], a[0]); |
|
586
|
|
|
|
|
|
|
COMBA_STORE(b[0]); |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
/* output 1 */ |
|
589
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
590
|
|
|
|
|
|
|
SQRADD2(a[0], a[1]); |
|
591
|
|
|
|
|
|
|
COMBA_STORE(b[1]); |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
/* output 2 */ |
|
594
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
595
|
|
|
|
|
|
|
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); |
|
596
|
|
|
|
|
|
|
COMBA_STORE(b[2]); |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
/* output 3 */ |
|
599
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
600
|
|
|
|
|
|
|
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); |
|
601
|
|
|
|
|
|
|
COMBA_STORE(b[3]); |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
/* output 4 */ |
|
604
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
605
|
|
|
|
|
|
|
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); |
|
606
|
|
|
|
|
|
|
COMBA_STORE(b[4]); |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
/* output 5 */ |
|
609
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
610
|
|
|
|
|
|
|
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; |
|
611
|
|
|
|
|
|
|
COMBA_STORE(b[5]); |
|
612
|
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
/* output 6 */ |
|
614
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
615
|
|
|
|
|
|
|
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); |
|
616
|
|
|
|
|
|
|
COMBA_STORE(b[6]); |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
/* output 7 */ |
|
619
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
620
|
|
|
|
|
|
|
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; |
|
621
|
|
|
|
|
|
|
COMBA_STORE(b[7]); |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
/* output 8 */ |
|
624
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
625
|
|
|
|
|
|
|
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); |
|
626
|
|
|
|
|
|
|
COMBA_STORE(b[8]); |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
/* output 9 */ |
|
629
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
630
|
|
|
|
|
|
|
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; |
|
631
|
|
|
|
|
|
|
COMBA_STORE(b[9]); |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
/* output 10 */ |
|
634
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
635
|
|
|
|
|
|
|
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); |
|
636
|
|
|
|
|
|
|
COMBA_STORE(b[10]); |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
/* output 11 */ |
|
639
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
640
|
|
|
|
|
|
|
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; |
|
641
|
|
|
|
|
|
|
COMBA_STORE(b[11]); |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
/* output 12 */ |
|
644
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
645
|
|
|
|
|
|
|
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); |
|
646
|
|
|
|
|
|
|
COMBA_STORE(b[12]); |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
/* output 13 */ |
|
649
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
650
|
|
|
|
|
|
|
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; |
|
651
|
|
|
|
|
|
|
COMBA_STORE(b[13]); |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
/* output 14 */ |
|
654
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
655
|
|
|
|
|
|
|
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); |
|
656
|
|
|
|
|
|
|
COMBA_STORE(b[14]); |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
/* output 15 */ |
|
659
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
660
|
|
|
|
|
|
|
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; |
|
661
|
|
|
|
|
|
|
COMBA_STORE(b[15]); |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
/* output 16 */ |
|
664
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
665
|
|
|
|
|
|
|
SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); |
|
666
|
|
|
|
|
|
|
COMBA_STORE(b[16]); |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
/* output 17 */ |
|
669
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
670
|
|
|
|
|
|
|
SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; |
|
671
|
|
|
|
|
|
|
COMBA_STORE(b[17]); |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
/* output 18 */ |
|
674
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
675
|
|
|
|
|
|
|
SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); |
|
676
|
|
|
|
|
|
|
COMBA_STORE(b[18]); |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
/* output 19 */ |
|
679
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
680
|
|
|
|
|
|
|
SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; |
|
681
|
|
|
|
|
|
|
COMBA_STORE(b[19]); |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
/* output 20 */ |
|
684
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
685
|
|
|
|
|
|
|
SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); |
|
686
|
|
|
|
|
|
|
COMBA_STORE(b[20]); |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
/* output 21 */ |
|
689
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
690
|
|
|
|
|
|
|
SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; |
|
691
|
|
|
|
|
|
|
COMBA_STORE(b[21]); |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
/* output 22 */ |
|
694
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
695
|
|
|
|
|
|
|
SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); |
|
696
|
|
|
|
|
|
|
COMBA_STORE(b[22]); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
/* output 23 */ |
|
699
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
700
|
|
|
|
|
|
|
SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; |
|
701
|
|
|
|
|
|
|
COMBA_STORE(b[23]); |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
/* output 24 */ |
|
704
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
705
|
|
|
|
|
|
|
SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); |
|
706
|
|
|
|
|
|
|
COMBA_STORE(b[24]); |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
/* output 25 */ |
|
709
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
710
|
|
|
|
|
|
|
SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; |
|
711
|
|
|
|
|
|
|
COMBA_STORE(b[25]); |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
/* output 26 */ |
|
714
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
715
|
|
|
|
|
|
|
SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); |
|
716
|
|
|
|
|
|
|
COMBA_STORE(b[26]); |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
/* output 27 */ |
|
719
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
720
|
|
|
|
|
|
|
SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); |
|
721
|
|
|
|
|
|
|
COMBA_STORE(b[27]); |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
/* output 28 */ |
|
724
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
725
|
|
|
|
|
|
|
SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); |
|
726
|
|
|
|
|
|
|
COMBA_STORE(b[28]); |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
/* output 29 */ |
|
729
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
730
|
|
|
|
|
|
|
SQRADD2(a[14], a[15]); |
|
731
|
|
|
|
|
|
|
COMBA_STORE(b[29]); |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
/* output 30 */ |
|
734
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
735
|
|
|
|
|
|
|
SQRADD(a[15], a[15]); |
|
736
|
|
|
|
|
|
|
COMBA_STORE(b[30]); |
|
737
|
|
|
|
|
|
|
COMBA_STORE2(b[31]); |
|
738
|
|
|
|
|
|
|
COMBA_FINI; |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
B->used = 32; |
|
741
|
|
|
|
|
|
|
B->sign = PSTM_ZPOS; |
|
742
|
|
|
|
|
|
|
memcpy(B->dp, b, 32 * sizeof(pstm_digit)); |
|
743
|
|
|
|
|
|
|
pstm_clamp(B); |
|
744
|
|
|
|
|
|
|
return PSTM_OKAY; |
|
745
|
|
|
|
|
|
|
} |
|
746
|
|
|
|
|
|
|
# endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */ |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
|
750
|
|
|
|
|
|
|
static int32_t pstm_sqr_comba32(const pstm_int *A, pstm_int *B) |
|
751
|
|
|
|
|
|
|
{ |
|
752
|
|
|
|
|
|
|
pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
|
755
|
|
|
|
|
|
|
pstm_word tt; |
|
756
|
|
|
|
|
|
|
# endif |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
if (B->alloc < 64) |
|
759
|
|
|
|
|
|
|
{ |
|
760
|
|
|
|
|
|
|
if (pstm_grow(B, 64) != PSTM_OKAY) |
|
761
|
|
|
|
|
|
|
{ |
|
762
|
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
763
|
|
|
|
|
|
|
} |
|
764
|
|
|
|
|
|
|
} |
|
765
|
|
|
|
|
|
|
sc0 = sc1 = sc2 = 0; |
|
766
|
|
|
|
|
|
|
a = A->dp; |
|
767
|
|
|
|
|
|
|
COMBA_START; |
|
768
|
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
/* clear carries */ |
|
770
|
|
|
|
|
|
|
CLEAR_CARRY; |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
/* output 0 */ |
|
773
|
|
|
|
|
|
|
SQRADD(a[0], a[0]); |
|
774
|
|
|
|
|
|
|
COMBA_STORE(b[0]); |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
/* output 1 */ |
|
777
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
778
|
|
|
|
|
|
|
SQRADD2(a[0], a[1]); |
|
779
|
|
|
|
|
|
|
COMBA_STORE(b[1]); |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
/* output 2 */ |
|
782
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
783
|
|
|
|
|
|
|
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); |
|
784
|
|
|
|
|
|
|
COMBA_STORE(b[2]); |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
/* output 3 */ |
|
787
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
788
|
|
|
|
|
|
|
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); |
|
789
|
|
|
|
|
|
|
COMBA_STORE(b[3]); |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
/* output 4 */ |
|
792
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
793
|
|
|
|
|
|
|
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); |
|
794
|
|
|
|
|
|
|
COMBA_STORE(b[4]); |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
/* output 5 */ |
|
797
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
798
|
|
|
|
|
|
|
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; |
|
799
|
|
|
|
|
|
|
COMBA_STORE(b[5]); |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
/* output 6 */ |
|
802
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
803
|
|
|
|
|
|
|
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); |
|
804
|
|
|
|
|
|
|
COMBA_STORE(b[6]); |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
/* output 7 */ |
|
807
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
808
|
|
|
|
|
|
|
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; |
|
809
|
|
|
|
|
|
|
COMBA_STORE(b[7]); |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
/* output 8 */ |
|
812
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
813
|
|
|
|
|
|
|
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); |
|
814
|
|
|
|
|
|
|
COMBA_STORE(b[8]); |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
/* output 9 */ |
|
817
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
818
|
|
|
|
|
|
|
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; |
|
819
|
|
|
|
|
|
|
COMBA_STORE(b[9]); |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
/* output 10 */ |
|
822
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
823
|
|
|
|
|
|
|
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); |
|
824
|
|
|
|
|
|
|
COMBA_STORE(b[10]); |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
/* output 11 */ |
|
827
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
828
|
|
|
|
|
|
|
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; |
|
829
|
|
|
|
|
|
|
COMBA_STORE(b[11]); |
|
830
|
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
/* output 12 */ |
|
832
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
833
|
|
|
|
|
|
|
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); |
|
834
|
|
|
|
|
|
|
COMBA_STORE(b[12]); |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
/* output 13 */ |
|
837
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
838
|
|
|
|
|
|
|
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; |
|
839
|
|
|
|
|
|
|
COMBA_STORE(b[13]); |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
/* output 14 */ |
|
842
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
843
|
|
|
|
|
|
|
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); |
|
844
|
|
|
|
|
|
|
COMBA_STORE(b[14]); |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
/* output 15 */ |
|
847
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
848
|
|
|
|
|
|
|
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; |
|
849
|
|
|
|
|
|
|
COMBA_STORE(b[15]); |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
/* output 16 */ |
|
852
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
853
|
|
|
|
|
|
|
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); |
|
854
|
|
|
|
|
|
|
COMBA_STORE(b[16]); |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
/* output 17 */ |
|
857
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
858
|
|
|
|
|
|
|
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; |
|
859
|
|
|
|
|
|
|
COMBA_STORE(b[17]); |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
/* output 18 */ |
|
862
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
863
|
|
|
|
|
|
|
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); |
|
864
|
|
|
|
|
|
|
COMBA_STORE(b[18]); |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
/* output 19 */ |
|
867
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
868
|
|
|
|
|
|
|
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; |
|
869
|
|
|
|
|
|
|
COMBA_STORE(b[19]); |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
/* output 20 */ |
|
872
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
873
|
|
|
|
|
|
|
SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); |
|
874
|
|
|
|
|
|
|
COMBA_STORE(b[20]); |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
/* output 21 */ |
|
877
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
878
|
|
|
|
|
|
|
SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; |
|
879
|
|
|
|
|
|
|
COMBA_STORE(b[21]); |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
/* output 22 */ |
|
882
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
883
|
|
|
|
|
|
|
SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); |
|
884
|
|
|
|
|
|
|
COMBA_STORE(b[22]); |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
/* output 23 */ |
|
887
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
888
|
|
|
|
|
|
|
SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; |
|
889
|
|
|
|
|
|
|
COMBA_STORE(b[23]); |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
/* output 24 */ |
|
892
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
893
|
|
|
|
|
|
|
SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); |
|
894
|
|
|
|
|
|
|
COMBA_STORE(b[24]); |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
/* output 25 */ |
|
897
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
898
|
|
|
|
|
|
|
SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; |
|
899
|
|
|
|
|
|
|
COMBA_STORE(b[25]); |
|
900
|
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
/* output 26 */ |
|
902
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
903
|
|
|
|
|
|
|
SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); |
|
904
|
|
|
|
|
|
|
COMBA_STORE(b[26]); |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
/* output 27 */ |
|
907
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
908
|
|
|
|
|
|
|
SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; |
|
909
|
|
|
|
|
|
|
COMBA_STORE(b[27]); |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
/* output 28 */ |
|
912
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
913
|
|
|
|
|
|
|
SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); |
|
914
|
|
|
|
|
|
|
COMBA_STORE(b[28]); |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
/* output 29 */ |
|
917
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
918
|
|
|
|
|
|
|
SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; |
|
919
|
|
|
|
|
|
|
COMBA_STORE(b[29]); |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
/* output 30 */ |
|
922
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
923
|
|
|
|
|
|
|
SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); |
|
924
|
|
|
|
|
|
|
COMBA_STORE(b[30]); |
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
/* output 31 */ |
|
927
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
928
|
|
|
|
|
|
|
SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; |
|
929
|
|
|
|
|
|
|
COMBA_STORE(b[31]); |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
/* output 32 */ |
|
932
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
933
|
|
|
|
|
|
|
SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); |
|
934
|
|
|
|
|
|
|
COMBA_STORE(b[32]); |
|
935
|
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
/* output 33 */ |
|
937
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
938
|
|
|
|
|
|
|
SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; |
|
939
|
|
|
|
|
|
|
COMBA_STORE(b[33]); |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
/* output 34 */ |
|
942
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
943
|
|
|
|
|
|
|
SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); |
|
944
|
|
|
|
|
|
|
COMBA_STORE(b[34]); |
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
/* output 35 */ |
|
947
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
948
|
|
|
|
|
|
|
SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; |
|
949
|
|
|
|
|
|
|
COMBA_STORE(b[35]); |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
/* output 36 */ |
|
952
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
953
|
|
|
|
|
|
|
SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); |
|
954
|
|
|
|
|
|
|
COMBA_STORE(b[36]); |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
/* output 37 */ |
|
957
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
958
|
|
|
|
|
|
|
SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; |
|
959
|
|
|
|
|
|
|
COMBA_STORE(b[37]); |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
/* output 38 */ |
|
962
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
963
|
|
|
|
|
|
|
SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); |
|
964
|
|
|
|
|
|
|
COMBA_STORE(b[38]); |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
/* output 39 */ |
|
967
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
968
|
|
|
|
|
|
|
SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; |
|
969
|
|
|
|
|
|
|
COMBA_STORE(b[39]); |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
/* output 40 */ |
|
972
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
973
|
|
|
|
|
|
|
SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); |
|
974
|
|
|
|
|
|
|
COMBA_STORE(b[40]); |
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
/* output 41 */ |
|
977
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
978
|
|
|
|
|
|
|
SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; |
|
979
|
|
|
|
|
|
|
COMBA_STORE(b[41]); |
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
/* output 42 */ |
|
982
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
983
|
|
|
|
|
|
|
SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); |
|
984
|
|
|
|
|
|
|
COMBA_STORE(b[42]); |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
/* output 43 */ |
|
987
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
988
|
|
|
|
|
|
|
SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; |
|
989
|
|
|
|
|
|
|
COMBA_STORE(b[43]); |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
/* output 44 */ |
|
992
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
993
|
|
|
|
|
|
|
SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); |
|
994
|
|
|
|
|
|
|
COMBA_STORE(b[44]); |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
/* output 45 */ |
|
997
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
998
|
|
|
|
|
|
|
SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; |
|
999
|
|
|
|
|
|
|
COMBA_STORE(b[45]); |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
/* output 46 */ |
|
1002
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1003
|
|
|
|
|
|
|
SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); |
|
1004
|
|
|
|
|
|
|
COMBA_STORE(b[46]); |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
/* output 47 */ |
|
1007
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1008
|
|
|
|
|
|
|
SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; |
|
1009
|
|
|
|
|
|
|
COMBA_STORE(b[47]); |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
/* output 48 */ |
|
1012
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1013
|
|
|
|
|
|
|
SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); |
|
1014
|
|
|
|
|
|
|
COMBA_STORE(b[48]); |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
/* output 49 */ |
|
1017
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1018
|
|
|
|
|
|
|
SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; |
|
1019
|
|
|
|
|
|
|
COMBA_STORE(b[49]); |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
/* output 50 */ |
|
1022
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1023
|
|
|
|
|
|
|
SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); |
|
1024
|
|
|
|
|
|
|
COMBA_STORE(b[50]); |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
/* output 51 */ |
|
1027
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1028
|
|
|
|
|
|
|
SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; |
|
1029
|
|
|
|
|
|
|
COMBA_STORE(b[51]); |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
/* output 52 */ |
|
1032
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1033
|
|
|
|
|
|
|
SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); |
|
1034
|
|
|
|
|
|
|
COMBA_STORE(b[52]); |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
/* output 53 */ |
|
1037
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1038
|
|
|
|
|
|
|
SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; |
|
1039
|
|
|
|
|
|
|
COMBA_STORE(b[53]); |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
/* output 54 */ |
|
1042
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1043
|
|
|
|
|
|
|
SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); |
|
1044
|
|
|
|
|
|
|
COMBA_STORE(b[54]); |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
/* output 55 */ |
|
1047
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1048
|
|
|
|
|
|
|
SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; |
|
1049
|
|
|
|
|
|
|
COMBA_STORE(b[55]); |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
/* output 56 */ |
|
1052
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1053
|
|
|
|
|
|
|
SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); |
|
1054
|
|
|
|
|
|
|
COMBA_STORE(b[56]); |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
/* output 57 */ |
|
1057
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1058
|
|
|
|
|
|
|
SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; |
|
1059
|
|
|
|
|
|
|
COMBA_STORE(b[57]); |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
/* output 58 */ |
|
1062
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1063
|
|
|
|
|
|
|
SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); |
|
1064
|
|
|
|
|
|
|
COMBA_STORE(b[58]); |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
/* output 59 */ |
|
1067
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1068
|
|
|
|
|
|
|
SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); |
|
1069
|
|
|
|
|
|
|
COMBA_STORE(b[59]); |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
/* output 60 */ |
|
1072
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1073
|
|
|
|
|
|
|
SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); |
|
1074
|
|
|
|
|
|
|
COMBA_STORE(b[60]); |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
/* output 61 */ |
|
1077
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1078
|
|
|
|
|
|
|
SQRADD2(a[30], a[31]); |
|
1079
|
|
|
|
|
|
|
COMBA_STORE(b[61]); |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
/* output 62 */ |
|
1082
|
|
|
|
|
|
|
CARRY_FORWARD; |
|
1083
|
|
|
|
|
|
|
SQRADD(a[31], a[31]); |
|
1084
|
|
|
|
|
|
|
COMBA_STORE(b[62]); |
|
1085
|
|
|
|
|
|
|
COMBA_STORE2(b[63]); |
|
1086
|
|
|
|
|
|
|
COMBA_FINI; |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
B->used = 64; |
|
1089
|
|
|
|
|
|
|
B->sign = PSTM_ZPOS; |
|
1090
|
|
|
|
|
|
|
memcpy(B->dp, b, 64 * sizeof(pstm_digit)); |
|
1091
|
|
|
|
|
|
|
pstm_clamp(B); |
|
1092
|
|
|
|
|
|
|
return PSTM_OKAY; |
|
1093
|
|
|
|
|
|
|
} |
|
1094
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
/******************************************************************************/ |
|
1097
|
|
|
|
|
|
|
/** |
|
1098
|
|
|
|
|
|
|
B = A**2. |
|
1099
|
|
|
|
|
|
|
@param[in] pool Memory pool |
|
1100
|
|
|
|
|
|
|
@param[in] A Base |
|
1101
|
|
|
|
|
|
|
@param[out] B Result |
|
1102
|
|
|
|
|
|
|
@param[in,out] paD Temporary storage |
|
1103
|
|
|
|
|
|
|
@param[in] paDlen Number of items pointed to by paD |
|
1104
|
|
|
|
|
|
|
*/ |
|
1105
|
5430342
|
|
|
|
|
|
int32_t pstm_sqr_comba(psPool_t *pool, const pstm_int *A, pstm_int *B, |
|
1106
|
|
|
|
|
|
|
pstm_digit *paD, psSize_t paDlen) |
|
1107
|
|
|
|
|
|
|
{ |
|
1108
|
|
|
|
|
|
|
# ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS |
|
1109
|
|
|
|
|
|
|
if (A->used == 16) |
|
1110
|
|
|
|
|
|
|
{ |
|
1111
|
|
|
|
|
|
|
return pstm_sqr_comba16(A, B); |
|
1112
|
|
|
|
|
|
|
} |
|
1113
|
|
|
|
|
|
|
else |
|
1114
|
|
|
|
|
|
|
{ |
|
1115
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
|
1116
|
|
|
|
|
|
|
if (A->used == 32) |
|
1117
|
|
|
|
|
|
|
{ |
|
1118
|
|
|
|
|
|
|
return pstm_sqr_comba32(A, B); |
|
1119
|
|
|
|
|
|
|
} |
|
1120
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
|
1121
|
|
|
|
|
|
|
return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); |
|
1122
|
|
|
|
|
|
|
} |
|
1123
|
|
|
|
|
|
|
# else |
|
1124
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
|
1125
|
|
|
|
|
|
|
if (A->used == 32) |
|
1126
|
|
|
|
|
|
|
{ |
|
1127
|
|
|
|
|
|
|
return pstm_sqr_comba32(A, B); |
|
1128
|
|
|
|
|
|
|
} |
|
1129
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
|
1130
|
5430342
|
|
|
|
|
|
return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); |
|
1131
|
|
|
|
|
|
|
# endif |
|
1132
|
|
|
|
|
|
|
} |
|
1133
|
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
#endif /* defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) */ |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
/******************************************************************************/ |
|
1137
|
|
|
|
|
|
|
|