| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/** |
|
2
|
|
|
|
|
|
|
* @file pstm_montgomery_reduce.c |
|
3
|
|
|
|
|
|
|
* @version 950bba4 (HEAD -> master) |
|
4
|
|
|
|
|
|
|
* |
|
5
|
|
|
|
|
|
|
* Multiprecision Montgomery Reduction. |
|
6
|
|
|
|
|
|
|
*/ |
|
7
|
|
|
|
|
|
|
/* |
|
8
|
|
|
|
|
|
|
* Copyright (c) 2013-2017 INSIDE Secure Corporation |
|
9
|
|
|
|
|
|
|
* Copyright (c) PeerSec Networks, 2002-2011 |
|
10
|
|
|
|
|
|
|
* All Rights Reserved |
|
11
|
|
|
|
|
|
|
* |
|
12
|
|
|
|
|
|
|
* The latest version of this code is available at http://www.matrixssl.org |
|
13
|
|
|
|
|
|
|
* |
|
14
|
|
|
|
|
|
|
* This software is open source; you can redistribute it and/or modify |
|
15
|
|
|
|
|
|
|
* it under the terms of the GNU General Public License as published by |
|
16
|
|
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
17
|
|
|
|
|
|
|
* (at your option) any later version. |
|
18
|
|
|
|
|
|
|
* |
|
19
|
|
|
|
|
|
|
* This General Public License does NOT permit incorporating this software |
|
20
|
|
|
|
|
|
|
* into proprietary programs. If you are unable to comply with the GPL, a |
|
21
|
|
|
|
|
|
|
* commercial license for this software may be purchased from INSIDE at |
|
22
|
|
|
|
|
|
|
* http://www.insidesecure.com/ |
|
23
|
|
|
|
|
|
|
* |
|
24
|
|
|
|
|
|
|
* This program is distributed in WITHOUT ANY WARRANTY; without even the |
|
25
|
|
|
|
|
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
26
|
|
|
|
|
|
|
* See the GNU General Public License for more details. |
|
27
|
|
|
|
|
|
|
* |
|
28
|
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License |
|
29
|
|
|
|
|
|
|
* along with this program; if not, write to the Free Software |
|
30
|
|
|
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
31
|
|
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
|
32
|
|
|
|
|
|
|
*/ |
|
33
|
|
|
|
|
|
|
/******************************************************************************/ |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#include "../cryptoImpl.h" |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#if defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) || defined(USE_MATRIX_DH) |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
/******************************************************************************/ |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# if defined(PSTM_X86) |
|
42
|
|
|
|
|
|
|
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ |
|
43
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT) |
|
44
|
|
|
|
|
|
|
# error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" |
|
45
|
|
|
|
|
|
|
# endif |
|
46
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit x86 Assembly Optimizations") */ |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# define MONT_START |
|
49
|
|
|
|
|
|
|
# define MONT_FINI |
|
50
|
|
|
|
|
|
|
# define LOOP_END |
|
51
|
|
|
|
|
|
|
# define LOOP_START \ |
|
52
|
|
|
|
|
|
|
mu = c[x] * mp |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# define INNERMUL \ |
|
55
|
|
|
|
|
|
|
asm ( \ |
|
56
|
|
|
|
|
|
|
"movl %5,%%eax \n\t" \ |
|
57
|
|
|
|
|
|
|
"mull %4 \n\t" \ |
|
58
|
|
|
|
|
|
|
"addl %1,%%eax \n\t" \ |
|
59
|
|
|
|
|
|
|
"adcl $0,%%edx \n\t" \ |
|
60
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
|
61
|
|
|
|
|
|
|
"adcl $0,%%edx \n\t" \ |
|
62
|
|
|
|
|
|
|
"movl %%edx,%1 \n\t" \ |
|
63
|
|
|
|
|
|
|
: "=g" (_c[LO]), "=r" (cy) \ |
|
64
|
|
|
|
|
|
|
: "0" (_c[LO]), "1" (cy), "g" (mu), "g" (*tmpm++) \ |
|
65
|
|
|
|
|
|
|
: "%eax", "%edx", "cc") |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
68
|
|
|
|
|
|
|
asm ( \ |
|
69
|
|
|
|
|
|
|
"addl %1,%0 \n\t" \ |
|
70
|
|
|
|
|
|
|
"setb %%al \n\t" \ |
|
71
|
|
|
|
|
|
|
"movzbl %%al,%1 \n\t" \ |
|
72
|
|
|
|
|
|
|
: "=g" (_c[LO]), "=r" (cy) \ |
|
73
|
|
|
|
|
|
|
: "0" (_c[LO]), "1" (cy) \ |
|
74
|
|
|
|
|
|
|
: "%eax", "cc") |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
/******************************************************************************/ |
|
77
|
|
|
|
|
|
|
# elif defined(PSTM_X86_64) |
|
78
|
|
|
|
|
|
|
/* x86-64 optimized */ |
|
79
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) |
|
80
|
|
|
|
|
|
|
# error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" |
|
81
|
|
|
|
|
|
|
# endif |
|
82
|
|
|
|
|
|
|
/* #pragma message ("Using 64 bit x86_64 Assembly Optimizations") */ |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# define MONT_START |
|
85
|
|
|
|
|
|
|
# define MONT_FINI |
|
86
|
|
|
|
|
|
|
# define LOOP_END |
|
87
|
|
|
|
|
|
|
# define LOOP_START \ |
|
88
|
|
|
|
|
|
|
mu = c[x] * mp |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# define INNERMUL \ |
|
91
|
|
|
|
|
|
|
asm ( \ |
|
92
|
|
|
|
|
|
|
"movq %5,%%rax \n\t" \ |
|
93
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
94
|
|
|
|
|
|
|
"addq %1,%%rax \n\t" \ |
|
95
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
96
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
|
97
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
98
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
99
|
|
|
|
|
|
|
: "=g" (_c[LO]), "=r" (cy) \ |
|
100
|
|
|
|
|
|
|
: "0" (_c[LO]), "1" (cy), "r" (mu), "r" (*tmpm++) \ |
|
101
|
|
|
|
|
|
|
: "%rax", "%rdx", "cc") |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# define INNERMUL8 \ |
|
104
|
|
|
|
|
|
|
asm ( \ |
|
105
|
|
|
|
|
|
|
"movq 0(%5),%%rax \n\t" \ |
|
106
|
|
|
|
|
|
|
"movq 0(%2),%%r10 \n\t" \ |
|
107
|
|
|
|
|
|
|
"movq 0x8(%5),%%r11 \n\t" \ |
|
108
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
109
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
110
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
111
|
|
|
|
|
|
|
"movq 0x8(%2),%%r10 \n\t" \ |
|
112
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
113
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
114
|
|
|
|
|
|
|
"movq %%rax,0(%0) \n\t" \ |
|
115
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
116
|
|
|
|
|
|
|
\ |
|
117
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
118
|
|
|
|
|
|
|
"movq 0x10(%5),%%r11 \n\t" \ |
|
119
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
120
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
121
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
122
|
|
|
|
|
|
|
"movq 0x10(%2),%%r10 \n\t" \ |
|
123
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
124
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
125
|
|
|
|
|
|
|
"movq %%rax,0x8(%0) \n\t" \ |
|
126
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
127
|
|
|
|
|
|
|
\ |
|
128
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
129
|
|
|
|
|
|
|
"movq 0x18(%5),%%r11 \n\t" \ |
|
130
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
131
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
132
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
133
|
|
|
|
|
|
|
"movq 0x18(%2),%%r10 \n\t" \ |
|
134
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
135
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
136
|
|
|
|
|
|
|
"movq %%rax,0x10(%0) \n\t" \ |
|
137
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
138
|
|
|
|
|
|
|
\ |
|
139
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
140
|
|
|
|
|
|
|
"movq 0x20(%5),%%r11 \n\t" \ |
|
141
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
142
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
143
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
144
|
|
|
|
|
|
|
"movq 0x20(%2),%%r10 \n\t" \ |
|
145
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
146
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
147
|
|
|
|
|
|
|
"movq %%rax,0x18(%0) \n\t" \ |
|
148
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
149
|
|
|
|
|
|
|
\ |
|
150
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
151
|
|
|
|
|
|
|
"movq 0x28(%5),%%r11 \n\t" \ |
|
152
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
153
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
154
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
155
|
|
|
|
|
|
|
"movq 0x28(%2),%%r10 \n\t" \ |
|
156
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
157
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
158
|
|
|
|
|
|
|
"movq %%rax,0x20(%0) \n\t" \ |
|
159
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
160
|
|
|
|
|
|
|
\ |
|
161
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
162
|
|
|
|
|
|
|
"movq 0x30(%5),%%r11 \n\t" \ |
|
163
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
164
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
165
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
166
|
|
|
|
|
|
|
"movq 0x30(%2),%%r10 \n\t" \ |
|
167
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
168
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
169
|
|
|
|
|
|
|
"movq %%rax,0x28(%0) \n\t" \ |
|
170
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
171
|
|
|
|
|
|
|
\ |
|
172
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
173
|
|
|
|
|
|
|
"movq 0x38(%5),%%r11 \n\t" \ |
|
174
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
175
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
176
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
177
|
|
|
|
|
|
|
"movq 0x38(%2),%%r10 \n\t" \ |
|
178
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
179
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
180
|
|
|
|
|
|
|
"movq %%rax,0x30(%0) \n\t" \ |
|
181
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
182
|
|
|
|
|
|
|
\ |
|
183
|
|
|
|
|
|
|
"movq %%r11,%%rax \n\t" \ |
|
184
|
|
|
|
|
|
|
"mulq %4 \n\t" \ |
|
185
|
|
|
|
|
|
|
"addq %%r10,%%rax \n\t" \ |
|
186
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
187
|
|
|
|
|
|
|
"addq %3,%%rax \n\t" \ |
|
188
|
|
|
|
|
|
|
"adcq $0,%%rdx \n\t" \ |
|
189
|
|
|
|
|
|
|
"movq %%rax,0x38(%0) \n\t" \ |
|
190
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
|
191
|
|
|
|
|
|
|
\ |
|
192
|
|
|
|
|
|
|
: "=r" (_c), "=r" (cy) \ |
|
193
|
|
|
|
|
|
|
: "0" (_c), "1" (cy), "g" (mu), "r" (tmpm) \ |
|
194
|
|
|
|
|
|
|
: "%rax", "%rdx", "%r10", "%r11", "cc") |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
197
|
|
|
|
|
|
|
asm ( \ |
|
198
|
|
|
|
|
|
|
"addq %1,%0 \n\t" \ |
|
199
|
|
|
|
|
|
|
"setb %%al \n\t" \ |
|
200
|
|
|
|
|
|
|
"movzbq %%al,%1 \n\t" \ |
|
201
|
|
|
|
|
|
|
: "=g" (_c[LO]), "=r" (cy) \ |
|
202
|
|
|
|
|
|
|
: "0" (_c[LO]), "1" (cy) \ |
|
203
|
|
|
|
|
|
|
: "%rax", "cc") |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
/******************************************************************************/ |
|
206
|
|
|
|
|
|
|
# elif defined(PSTM_ARM) |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# define MONT_START |
|
209
|
|
|
|
|
|
|
# define MONT_FINI |
|
210
|
|
|
|
|
|
|
# define LOOP_END |
|
211
|
|
|
|
|
|
|
# define LOOP_START \ |
|
212
|
|
|
|
|
|
|
mu = c[x] * mp |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# ifdef __thumb2__ |
|
215
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations") */ |
|
216
|
|
|
|
|
|
|
# define INNERMUL \ |
|
217
|
|
|
|
|
|
|
asm ( \ |
|
218
|
|
|
|
|
|
|
" LDR r0,%1 \n\t" \ |
|
219
|
|
|
|
|
|
|
" ADDS r0,r0,%0 \n\t" \ |
|
220
|
|
|
|
|
|
|
" ITE CS \n\t" \ |
|
221
|
|
|
|
|
|
|
" MOVCS %0,#1 \n\t" \ |
|
222
|
|
|
|
|
|
|
" MOVCC %0,#0 \n\t" \ |
|
223
|
|
|
|
|
|
|
" UMLAL r0,%0,%3,%4 \n\t" \ |
|
224
|
|
|
|
|
|
|
" STR r0,%1 \n\t" \ |
|
225
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
226
|
|
|
|
|
|
|
: "0" (cy), "r" (mu), "r" (*tmpm++), "m" (_c[0]) \ |
|
227
|
|
|
|
|
|
|
: "r0", "cc"); |
|
228
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
229
|
|
|
|
|
|
|
asm ( \ |
|
230
|
|
|
|
|
|
|
" LDR r0,%1 \n\t" \ |
|
231
|
|
|
|
|
|
|
" ADDS r0,r0,%0 \n\t" \ |
|
232
|
|
|
|
|
|
|
" STR r0,%1 \n\t" \ |
|
233
|
|
|
|
|
|
|
" ITE CS \n\t" \ |
|
234
|
|
|
|
|
|
|
" MOVCS %0,#1 \n\t" \ |
|
235
|
|
|
|
|
|
|
" MOVCC %0,#0 \n\t" \ |
|
236
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
237
|
|
|
|
|
|
|
: "0" (cy), "m" (_c[0]) \ |
|
238
|
|
|
|
|
|
|
: "r0", "cc"); |
|
239
|
|
|
|
|
|
|
# else /* Non-Thumb2 code */ |
|
240
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit ARM Assembly Optimizations") */ |
|
241
|
|
|
|
|
|
|
# define INNERMUL \ |
|
242
|
|
|
|
|
|
|
asm ( \ |
|
243
|
|
|
|
|
|
|
" LDR r0,%1 \n\t" \ |
|
244
|
|
|
|
|
|
|
" ADDS r0,r0,%0 \n\t" \ |
|
245
|
|
|
|
|
|
|
" MOVCS %0,#1 \n\t" \ |
|
246
|
|
|
|
|
|
|
" MOVCC %0,#0 \n\t" \ |
|
247
|
|
|
|
|
|
|
" UMLAL r0,%0,%3,%4 \n\t" \ |
|
248
|
|
|
|
|
|
|
" STR r0,%1 \n\t" \ |
|
249
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
250
|
|
|
|
|
|
|
: "0" (cy), "r" (mu), "r" (*tmpm++), "m" (_c[0]) \ |
|
251
|
|
|
|
|
|
|
: "r0", "cc"); |
|
252
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
253
|
|
|
|
|
|
|
asm ( \ |
|
254
|
|
|
|
|
|
|
" LDR r0,%1 \n\t" \ |
|
255
|
|
|
|
|
|
|
" ADDS r0,r0,%0 \n\t" \ |
|
256
|
|
|
|
|
|
|
" STR r0,%1 \n\t" \ |
|
257
|
|
|
|
|
|
|
" MOVCS %0,#1 \n\t" \ |
|
258
|
|
|
|
|
|
|
" MOVCC %0,#0 \n\t" \ |
|
259
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
260
|
|
|
|
|
|
|
: "0" (cy), "m" (_c[0]) \ |
|
261
|
|
|
|
|
|
|
: "r0", "cc"); |
|
262
|
|
|
|
|
|
|
# endif /* __thumb2__ */ |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
/******************************************************************************/ |
|
265
|
|
|
|
|
|
|
# elif defined(PSTM_MIPS) |
|
266
|
|
|
|
|
|
|
/* MIPS32 */ |
|
267
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit MIPS Assembly Optimizations") */ |
|
268
|
|
|
|
|
|
|
# define MONT_START |
|
269
|
|
|
|
|
|
|
# define MONT_FINI |
|
270
|
|
|
|
|
|
|
# define LOOP_END |
|
271
|
|
|
|
|
|
|
# define LOOP_START \ |
|
272
|
|
|
|
|
|
|
mu = c[x] * mp |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# define INNERMUL \ |
|
275
|
|
|
|
|
|
|
asm ( \ |
|
276
|
|
|
|
|
|
|
" multu %3,%4 \n\t" \ |
|
277
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
|
278
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
|
279
|
|
|
|
|
|
|
" addu $12,$12,%0 \n\t" \ |
|
280
|
|
|
|
|
|
|
" sltu $10,$12,%0 \n\t" \ |
|
281
|
|
|
|
|
|
|
" addu $13,$13,$10 \n\t" \ |
|
282
|
|
|
|
|
|
|
" lw $10,%1 \n\t" \ |
|
283
|
|
|
|
|
|
|
" addu $12,$12,$10 \n\t" \ |
|
284
|
|
|
|
|
|
|
" sltu $10,$12,$10 \n\t" \ |
|
285
|
|
|
|
|
|
|
" addu %0,$13,$10 \n\t" \ |
|
286
|
|
|
|
|
|
|
" sw $12,%1 \n\t" \ |
|
287
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
288
|
|
|
|
|
|
|
: "r" (cy), "r" (mu), "r" (tmpm[0]), "r" (_c[0]) \ |
|
289
|
|
|
|
|
|
|
: "$10", "$12", "$13") \ |
|
290
|
|
|
|
|
|
|
; ++tmpm; |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
293
|
|
|
|
|
|
|
asm ( \ |
|
294
|
|
|
|
|
|
|
" lw $10,%1 \n\t" \ |
|
295
|
|
|
|
|
|
|
" addu $10,$10,%0 \n\t" \ |
|
296
|
|
|
|
|
|
|
" sw $10,%1 \n\t" \ |
|
297
|
|
|
|
|
|
|
" sltu %0,$10,%0 \n\t" \ |
|
298
|
|
|
|
|
|
|
: "=r" (cy), "=m" (_c[0]) \ |
|
299
|
|
|
|
|
|
|
: "r" (cy), "r" (_c[0]) \ |
|
300
|
|
|
|
|
|
|
: "$10"); |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
/******************************************************************************/ |
|
303
|
|
|
|
|
|
|
# else |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
/* ISO C code */ |
|
306
|
|
|
|
|
|
|
# define MONT_START |
|
307
|
|
|
|
|
|
|
# define MONT_FINI |
|
308
|
|
|
|
|
|
|
# define LOOP_END |
|
309
|
|
|
|
|
|
|
# define LOOP_START \ |
|
310
|
|
|
|
|
|
|
mu = c[x] * mp |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# define INNERMUL \ |
|
313
|
|
|
|
|
|
|
do { pstm_word t; \ |
|
314
|
|
|
|
|
|
|
t = ((pstm_word) _c[0] + (pstm_word) cy) + \ |
|
315
|
|
|
|
|
|
|
(((pstm_word) mu) * ((pstm_word) * tmpm++)); \ |
|
316
|
|
|
|
|
|
|
_c[0] = (pstm_digit) t; \ |
|
317
|
|
|
|
|
|
|
cy = (pstm_digit) (t >> DIGIT_BIT); \ |
|
318
|
|
|
|
|
|
|
} while (0) |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# define PROPCARRY \ |
|
321
|
|
|
|
|
|
|
do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0) |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# endif |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
/******************************************************************************/ |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# define LO 0 |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
/** |
|
330
|
|
|
|
|
|
|
computes x/R == x (mod N) via Montgomery Reduction. |
|
331
|
|
|
|
|
|
|
*/ |
|
332
|
12819826
|
|
|
|
|
|
int32_t pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, const pstm_int *m, |
|
333
|
|
|
|
|
|
|
pstm_digit mp, pstm_digit *paD, psSize_t paDlen) |
|
334
|
|
|
|
|
|
|
{ |
|
335
|
|
|
|
|
|
|
pstm_digit *c, *_c, *tmpm, mu; |
|
336
|
|
|
|
|
|
|
int32 oldused, x, y; |
|
337
|
|
|
|
|
|
|
int16 pa; |
|
338
|
|
|
|
|
|
|
uint32 cSize; |
|
339
|
|
|
|
|
|
|
|
|
340
|
12819826
|
|
|
|
|
|
pa = m->used; |
|
341
|
12819826
|
50
|
|
|
|
|
if (pa > a->alloc) |
|
342
|
|
|
|
|
|
|
{ |
|
343
|
|
|
|
|
|
|
/* Sanity test for bad numbers. This will confirm no buffer overruns */ |
|
344
|
0
|
|
|
|
|
|
return PS_LIMIT_FAIL; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
12819826
|
|
|
|
|
|
cSize = (2 * pa + 1) * sizeof(pstm_digit); |
|
348
|
12819826
|
50
|
|
|
|
|
if (paD && paDlen >= cSize) |
|
|
|
50
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
{ |
|
350
|
12819826
|
|
|
|
|
|
c = paD; |
|
351
|
12819826
|
|
|
|
|
|
memset(c, 0x0, paDlen); |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
else |
|
354
|
|
|
|
|
|
|
{ |
|
355
|
0
|
|
|
|
|
|
c = psMalloc(pool, cSize); |
|
356
|
0
|
0
|
|
|
|
|
if (c == NULL) |
|
357
|
|
|
|
|
|
|
{ |
|
358
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
else |
|
361
|
|
|
|
|
|
|
{ |
|
362
|
0
|
|
|
|
|
|
memset(c, 0x0, cSize); |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
/* copy the input */ |
|
366
|
12819826
|
|
|
|
|
|
oldused = a->used; |
|
367
|
229665027
|
100
|
|
|
|
|
for (x = 0; x < oldused; x++) |
|
368
|
|
|
|
|
|
|
{ |
|
369
|
216845201
|
|
|
|
|
|
c[x] = a->dp[x]; |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
MONT_START; |
|
373
|
|
|
|
|
|
|
|
|
374
|
128168710
|
100
|
|
|
|
|
for (x = 0; x < pa; x++) |
|
375
|
|
|
|
|
|
|
{ |
|
376
|
115348884
|
|
|
|
|
|
pstm_digit cy = 0; |
|
377
|
|
|
|
|
|
|
/* get Mu for this round */ |
|
378
|
115348884
|
|
|
|
|
|
LOOP_START; |
|
379
|
115348884
|
|
|
|
|
|
_c = c + x; |
|
380
|
115348884
|
|
|
|
|
|
tmpm = m->dp; |
|
381
|
115348884
|
|
|
|
|
|
y = 0; |
|
382
|
|
|
|
|
|
|
# ifdef PSTM_X86_64 |
|
383
|
230674128
|
100
|
|
|
|
|
for (; y < (pa & ~7); y += 8) |
|
384
|
|
|
|
|
|
|
{ |
|
385
|
115325244
|
|
|
|
|
|
INNERMUL8; |
|
386
|
115325244
|
|
|
|
|
|
_c += 8; |
|
387
|
115325244
|
|
|
|
|
|
tmpm += 8; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
# endif /* PSTM_X86_64 */ |
|
390
|
230768688
|
100
|
|
|
|
|
for (; y < pa; y++) |
|
391
|
|
|
|
|
|
|
{ |
|
392
|
115419804
|
|
|
|
|
|
INNERMUL; |
|
393
|
115419804
|
|
|
|
|
|
++_c; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
LOOP_END; |
|
396
|
230244105
|
100
|
|
|
|
|
while (cy) |
|
397
|
|
|
|
|
|
|
{ |
|
398
|
114895221
|
|
|
|
|
|
PROPCARRY; |
|
399
|
114895221
|
|
|
|
|
|
++_c; |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
} |
|
402
|
|
|
|
|
|
|
/* now copy out */ |
|
403
|
12819826
|
|
|
|
|
|
_c = c + pa; |
|
404
|
12819826
|
|
|
|
|
|
tmpm = a->dp; |
|
405
|
140988536
|
100
|
|
|
|
|
for (x = 0; x < pa + 1; x++) |
|
406
|
|
|
|
|
|
|
{ |
|
407
|
128168710
|
|
|
|
|
|
*tmpm++ = *_c++; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
101857800
|
100
|
|
|
|
|
for (; x < oldused; x++) |
|
411
|
|
|
|
|
|
|
{ |
|
412
|
89037974
|
|
|
|
|
|
*tmpm++ = 0; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
MONT_FINI; |
|
416
|
|
|
|
|
|
|
|
|
417
|
12819826
|
|
|
|
|
|
a->used = pa + 1; |
|
418
|
12819826
|
|
|
|
|
|
pstm_clamp(a); |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
/* reuse x as return code */ |
|
421
|
12819826
|
|
|
|
|
|
x = PSTM_OKAY; |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
/* if A >= m then A = A - m */ |
|
424
|
12819826
|
100
|
|
|
|
|
if (pstm_cmp_mag(a, m) != PSTM_LT) |
|
425
|
|
|
|
|
|
|
{ |
|
426
|
1637
|
50
|
|
|
|
|
if (pstm_sub_s(a, m, a) != PSTM_OKAY) |
|
427
|
|
|
|
|
|
|
{ |
|
428
|
0
|
|
|
|
|
|
x = PS_MEM_FAIL; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
} |
|
431
|
12819826
|
50
|
|
|
|
|
if (c && c != paD) |
|
|
|
50
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
{ |
|
433
|
0
|
|
|
|
|
|
psFree(c, pool); |
|
434
|
|
|
|
|
|
|
} |
|
435
|
12819826
|
|
|
|
|
|
return x; |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
#endif /* defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) */ |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
/******************************************************************************/ |
|
441
|
|
|
|
|
|
|
|