Line data Source code
1 : /* Modular multiplication.
2 :
3 : Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 : Paul Zimmermann, Alexander Kruppa and Cyril Bouvier.
5 :
6 : This file is part of the ECM Library.
7 :
8 : The ECM Library is free software; you can redistribute it and/or modify
9 : it under the terms of the GNU Lesser General Public License as published by
10 : the Free Software Foundation; either version 3 of the License, or (at your
11 : option) any later version.
12 :
13 : The ECM Library is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 : License for more details.
17 :
18 : You should have received a copy of the GNU Lesser General Public License
19 : along with the ECM Library; see the file COPYING.LIB. If not, see
20 : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 :
23 : #include <stdio.h>
24 : #include <stdlib.h>
25 : #include "ecm-gmp.h"
26 : #include "ecm-impl.h"
27 : #include "mpmod.h"
28 :
29 : #ifdef USE_ASM_REDC
30 : #include "mulredc.h"
31 : #endif
32 :
33 : FILE *ECM_STDOUT, *ECM_STDERR; /* define them here since needed in tune.c */
34 :
35 : /* define WANT_ASSERT to check normalization of residues */
36 : /* #define WANT_ASSERT 1 */
37 : /* #define DEBUG */
38 : /* #define WANT_ASSERT_EXPENSIVE 1 */
39 :
40 : #define ASSERT_NORMALIZED(x) ASSERT ((modulus->repr != ECM_MOD_MODMULN && \
41 : modulus->repr != ECM_MOD_REDC) || \
42 : mpz_size (x) <= mpz_size (modulus->orig_modulus))
43 : #define MPZ_NORMALIZED(x) ASSERT (PTR(x)[ABSIZ(x)-1] != 0)
44 :
45 :
46 :
47 : static void ecm_redc_basecase (mpz_ptr, mpz_ptr, mpmod_t) ATTRIBUTE_HOT;
48 : static void ecm_mulredc_basecase (mpres_t, const mpres_t, const mpres_t,
49 : mpmod_t) ATTRIBUTE_HOT;
50 : static void base2mod (mpres_t, const mpres_t, mpres_t, mpmod_t) ATTRIBUTE_HOT;
51 : static void REDC (mpres_t, const mpres_t, mpz_t, mpmod_t);
52 :
53 : /* returns +/-l if n is a factor of N = 2^l +/- 1 with N <= n^threshold,
54 : 0 otherwise.
55 : */
56 : int
57 1590 : isbase2 (const mpz_t n, const double threshold)
58 : {
59 : unsigned int k, lo;
60 1590 : int res = 0;
61 : mpz_t u, w;
62 :
63 1590 : mpz_init (u);
64 1590 : mpz_init (w);
65 1590 : lo = mpz_sizeinbase (n, 2) - 1; /* 2^lo <= n < 2^(lo+1) */
66 1590 : mpz_set_ui (u, 1UL);
67 1590 : mpz_mul_2exp (u, u, 2UL * lo);
68 1590 : mpz_mod (w, u, n); /* 2^(2lo) mod n = -/+2^(2lo-l) if m*n = 2^l+/-1 */
69 1590 : if (mpz_cmp_ui (w, 1UL) == 0) /* if 2^(2lo) mod n = 1, then n divides
70 : 2^(2lo)-1. If algebraic factors have been
71 : removed, n divides either 2^lo+1 or 2^lo-1.
72 : But since n has lo+1 bits, n can only divide
73 : 2^lo+1. More precisely, n must be 2^lo+1. */
74 : {
75 : /* check that n equals 2^lo+1. Since n divides 2^(2lo)-1, n is odd. */
76 38 : if (mpz_scan1 (n, 1UL) != lo)
77 7 : lo = 0;
78 38 : mpz_clear (w);
79 38 : mpz_clear (u);
80 38 : return lo;
81 : }
82 1552 : k = mpz_sizeinbase (w, 2) - 1;
83 : /* if w = 2^k then n divides 2^(2*lo-k)-1 */
84 1552 : mpz_set_ui (u, 1UL);
85 1552 : mpz_mul_2exp (u, u, k);
86 1552 : if (mpz_cmp(w, u) == 0)
87 119 : res = k - 2 * lo;
88 : else /* if w = -2^k then n divides 2^(2*lo-k)+1 */
89 : {
90 1433 : mpz_neg (w, w);
91 1433 : mpz_mod (w, w, n);
92 1433 : k = mpz_sizeinbase (w, 2) - 1;
93 1433 : mpz_set_ui (u, 1UL);
94 1433 : mpz_mul_2exp (u, u, k);
95 1433 : if (mpz_cmp (w, u) == 0)
96 57 : res = 2 * lo - k;
97 : }
98 1552 : mpz_clear (u);
99 1552 : mpz_clear (w);
100 :
101 1552 : if (abs (res) > (int) (threshold * (double) lo))
102 24 : res = 0;
103 :
104 1552 : if (abs (res) < 16)
105 1400 : res = 0;
106 :
107 1552 : return res;
108 : }
109 :
110 : /* Do base-2 reduction. R must not equal S or t. */
111 : static void
112 29668619 : base2mod (mpres_t R, const mpres_t S, mpres_t t, mpmod_t modulus)
113 : {
114 29668619 : unsigned long absbits = abs (modulus->bits);
115 :
116 : ASSERT (R != S && R != t);
117 29668619 : mpz_tdiv_q_2exp (R, S, absbits);
118 29668619 : mpz_tdiv_r_2exp (t, S, absbits);
119 29668619 : if (modulus->bits < 0)
120 16326241 : mpz_add (R, R, t);
121 : else
122 13342378 : mpz_sub (R, t, R);
123 :
124 : /* mpz_mod (R, R, modulus->orig_modulus); */
125 38017414 : while (mpz_sizeinbase (R, 2) > absbits)
126 : {
127 8348795 : mpz_tdiv_q_2exp (t, R, absbits);
128 8348795 : mpz_tdiv_r_2exp (R, R, absbits);
129 8348795 : if (modulus->bits < 0)
130 6133610 : mpz_add (R, R, t);
131 : else
132 2215185 : mpz_sub (R, R, t);
133 : }
134 29668619 : }
135 :
136 : /* Modular reduction modulo the Fermat number 2^m+1.
137 : n = m / GMP_NUMB_BITS. Result is < 2^m+1.
138 : FIXME: this does not work with nails.
139 : Only copies the data to R if reduction is needed and returns 1 in that
140 : case. If the value in S is reduced already, nothing is done and 0 is
141 : returned. Yes, this is ugly. */
142 : static int
143 68053 : base2mod_2 (mpres_t R, const mpres_t S, mp_size_t n, mpz_t modulus)
144 : {
145 : mp_size_t s;
146 :
147 68053 : s = ABSIZ(S);
148 68053 : if (s > n)
149 : {
150 10565 : if (s == n + 1)
151 : {
152 10565 : mp_srcptr sp = PTR(S);
153 : mp_ptr rp;
154 :
155 10565 : MPZ_REALLOC (R, s);
156 10565 : rp = PTR(R);
157 :
158 10565 : if ((rp[n] = mpn_sub_1 (rp, sp, n, sp[n])))
159 0 : rp[n] = mpn_add_1 (rp, rp, n, rp[n]);
160 21130 : MPN_NORMALIZE(rp, s);
161 : ASSERT (s <= n || (s == n && rp[n] == 1));
162 10565 : SIZ(R) = (SIZ(S) > 0) ? (int) s : (int) -s;
163 : }
164 : else /* should happen rarely */
165 0 : mpz_mod (R, S, modulus);
166 :
167 10565 : return 1;
168 : }
169 :
170 57488 : return 0;
171 : }
172 :
173 : /* subquadratic REDC, at mpn level.
174 : {orig,n} is the original modulus.
175 : Requires xn = 2n or 2n-1 and ABSIZ(orig_modulus)=n.
176 : */
177 : static void
178 100007046 : ecm_redc_n (mp_ptr rp, mp_srcptr x0p, mp_size_t xn,
179 : mp_srcptr orig, mp_srcptr invm, mp_size_t n)
180 : {
181 : mp_ptr tp, up, xp;
182 100007046 : mp_size_t nn = n + n;
183 : mp_limb_t cy, cin;
184 : TMP_DECL;
185 :
186 : ASSERT((xn == nn) || (xn == nn - 1));
187 :
188 : TMP_MARK;
189 100007046 : up = TMP_ALLOC_LIMBS(nn + nn);
190 100007046 : if (xn < nn)
191 : {
192 0 : xp = TMP_ALLOC_LIMBS(nn);
193 0 : MPN_COPY (xp, x0p, xn);
194 0 : xp[nn - 1] = 0;
195 : }
196 : else
197 100007046 : xp = (mp_ptr) x0p;
198 : #ifdef HAVE___GMPN_MULLO_N /* available up from GMP 5.0.0 */
199 100007046 : __gmpn_mullo_n (up, xp, invm, n);
200 : #else
201 : ecm_mul_lo_n (up, xp, invm, n);
202 : #endif
203 100007046 : tp = up + nn;
204 100007046 : mpn_mul_n (tp, up, orig, n);
205 : /* add {x, 2n} and {tp, 2n}. We know that {tp, n} + {xp, n} will give
206 : either 0, or a carry out. If xp[n-1] <> 0 or tp[n-1] <> 0,
207 : then there is a carry. We use a binary OR, which sets the zero flag
208 : if and only if both operands are zero. */
209 100007046 : cin = (mp_limb_t) ((xp[n - 1] | tp[n - 1]) ? 1 : 0);
210 : #ifdef HAVE___GMPN_ADD_NC
211 100007046 : cy = __gmpn_add_nc (rp, tp + n, xp + n, n, cin);
212 : #else
213 : cy = mpn_add_n (rp, tp + n, xp + n, n);
214 : cy += mpn_add_1 (rp, rp, n, cin);
215 : #endif
216 : /* since we add at most N-1 to the upper half of {x0p,2n},
217 : one adjustment is enough */
218 100007046 : if (cy)
219 3372889 : cy -= mpn_sub_n (rp, rp, orig, n);
220 : ASSERT (cy == 0);
221 : TMP_FREE;
222 100007046 : }
223 :
224 : /* REDC. x and t must not be identical, t has limb growth */
225 : /* subquadratic REDC, at mpz level */
226 : static void
227 380122664 : REDC (mpres_t r, const mpres_t x, mpz_t t, mpmod_t modulus)
228 : {
229 380122664 : mp_size_t n = modulus->bits / GMP_NUMB_BITS;
230 380122664 : mp_size_t xn = ABSIZ(x);
231 :
232 : ASSERT (xn <= 2 * n);
233 380122664 : if (xn == 2 * n) /* ecm_redc_n also accepts xn=2n-1, but this seems slower
234 : for now (see remark in TODO) */
235 : {
236 : mp_ptr rp;
237 93019051 : MPZ_REALLOC (r, n);
238 93019051 : rp = PTR(r);
239 93019051 : ecm_redc_n (rp, PTR(x), xn, PTR(modulus->orig_modulus),
240 93019051 : PTR(modulus->aux_modulus), n);
241 93019061 : MPN_NORMALIZE(rp, n);
242 93019051 : SIZ(r) = (SIZ(x) > 0) ? (int) n : (int) -n;
243 : MPZ_NORMALIZED (r);
244 : }
245 : else
246 : {
247 287103613 : mpz_tdiv_r_2exp (t, x, modulus->bits);
248 287103613 : mpz_mul (t, t, modulus->aux_modulus);
249 287103613 : mpz_tdiv_r_2exp (t, t, modulus->bits); /* t = (x % R) * 1/N (mod R) */
250 287103613 : mpz_mul (t, t, modulus->orig_modulus); /* t <= (R-1)*N */
251 287103613 : mpz_add (t, t, x); /* t < (R-1)*N + R^2/B */
252 287103613 : mpz_tdiv_q_2exp (r, t, modulus->bits); /* r = (x + m*N) / R
253 : < N + R/B */
254 287103613 : if (ABSIZ (r) > n) /* this can happen in practice but is extremely
255 : unlikely: in particular it requires that the
256 : upper limb of N has only ones */
257 0 : mpz_sub (r, r, modulus->multiple);
258 : }
259 : ASSERT (ABSIZ(r) <= n);
260 380122664 : }
261 :
262 :
263 : /* Quadratic time redc for n word moduli. */
264 : static inline void
265 15346253 : redc_basecase_n (mp_ptr rp, mp_ptr cp, mp_srcptr np, const mp_size_t nn,
266 : const mp_ptr invm)
267 : {
268 : #ifdef HAVE___GMPN_REDC_2
269 15346253 : REDC2(rp, cp, np, nn, invm);
270 : #else /* HAVE___GMPN_REDC_2 is not defined */
271 : #ifdef HAVE___GMPN_REDC_1
272 : REDC1(rp, cp, np, nn, invm[0]);
273 : #else /* neither HAVE___GMPN_REDC_2 nor HAVE___GMPN_REDC_1 is defined */
274 : mp_limb_t cy;
275 : mp_size_t j;
276 :
277 : for (j = 0; j < nn; j++)
278 : {
279 : cy = mpn_addmul_1 (cp, np, nn, cp[0] * invm[0]);
280 : ASSERT(cp[0] == (mp_limb_t) 0);
281 : cp[0] = cy;
282 : cp++;
283 : }
284 : /* add vector of carries and shift */
285 : cy = mpn_add_n (rp, cp, cp - nn, nn);
286 : /* the result of Montgomery's REDC is less than 2^Nbits + N,
287 : thus at most one correction is enough */
288 : if (cy != 0)
289 : {
290 : mp_limb_t t;
291 : t = mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
292 : ASSERT (t == 1);
293 : }
294 : #endif /* HAVE___GMPN_REDC_1 */
295 : #endif /* HAVE___GMPN_REDC_2 */
296 15346253 : }
297 :
298 : /* r <- c/R^nn mod n, where n has nn limbs, and R=2^GMP_NUMB_BITS.
299 : n must be odd.
300 : c must have space for at least 2*nn limbs.
301 : r must have space for at least n limbs.
302 : c and r can be the same variable.
303 : The data in c is clobbered.
304 : */
305 : static void
306 15346253 : ecm_redc_basecase (mpz_ptr r, mpz_ptr c, mpmod_t modulus)
307 : {
308 : mp_ptr rp;
309 : mp_ptr cp;
310 : mp_srcptr np;
311 15346253 : mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
312 :
313 : ASSERT(ABSIZ(c) <= 2 * nn);
314 : ASSERT(ALLOC(c) >= 2 * nn);
315 : ASSERT(ALLOC(r) >= nn);
316 15346253 : cp = PTR(c);
317 15346253 : rp = PTR(r);
318 15346253 : np = PTR(modulus->orig_modulus);
319 72703007 : for (j = ABSIZ(c); j < 2 * nn; j++)
320 57356754 : cp[j] = 0;
321 :
322 15346253 : redc_basecase_n (rp, cp, np, nn, modulus->Nprim);
323 :
324 15695134 : MPN_NORMALIZE (rp, nn);
325 15346253 : SIZ(r) = SIZ(c) < 0 ? (int) -nn : (int) nn;
326 15346253 : }
327 :
328 : #ifdef USE_ASM_REDC
329 : /* Quadratic time multiplication and REDC with nn-limb modulus.
330 : x and y are nn-limb residues, the nn-limb result is written to z.
331 : This function merely calls the correct mulredc*() assembly function
332 : depending on nn, and processes any leftover carry. */
333 :
334 : static void
335 5792701534 : mulredc (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_srcptr m,
336 : const mp_size_t nn, const mp_limb_t invm)
337 : {
338 : mp_limb_t cy;
339 5792701534 : switch (nn)
340 : {
341 112427488 : case 1:
342 112427488 : cy = mulredc1(z, x[0], y[0], m[0], invm);
343 112427488 : break;
344 1365842989 : case 2:
345 1365842989 : cy = mulredc2(z, x, y, m, invm);
346 1365842989 : break;
347 3987645064 : case 3:
348 3987645064 : cy = mulredc3(z, x, y, m, invm);
349 3987645064 : break;
350 27876146 : case 4:
351 27876146 : cy = mulredc4(z, x, y, m, invm);
352 27876146 : break;
353 165038451 : case 5:
354 165038451 : cy = mulredc5(z, x, y, m, invm);
355 165038451 : break;
356 43877059 : case 6:
357 43877059 : cy = mulredc6(z, x, y, m, invm);
358 43877059 : break;
359 14966255 : case 7:
360 14966255 : cy = mulredc7(z, x, y, m, invm);
361 14966255 : break;
362 20153404 : case 8:
363 20153404 : cy = mulredc8(z, x, y, m, invm);
364 20153404 : break;
365 2255154 : case 9:
366 2255154 : cy = mulredc9(z, x, y, m, invm);
367 2255154 : break;
368 4341894 : case 10:
369 4341894 : cy = mulredc10(z, x, y, m, invm);
370 4341894 : break;
371 1882650 : case 11:
372 1882650 : cy = mulredc11(z, x, y, m, invm);
373 1882650 : break;
374 1082076 : case 12:
375 1082076 : cy = mulredc12(z, x, y, m, invm);
376 1082076 : break;
377 147589 : case 13:
378 147589 : cy = mulredc13(z, x, y, m, invm);
379 147589 : break;
380 42685654 : case 14:
381 42685654 : cy = mulredc14(z, x, y, m, invm);
382 42685654 : break;
383 159339 : case 15:
384 159339 : cy = mulredc15(z, x, y, m, invm);
385 159339 : break;
386 153379 : case 16:
387 153379 : cy = mulredc16(z, x, y, m, invm);
388 153379 : break;
389 140797 : case 17:
390 140797 : cy = mulredc17(z, x, y, m, invm);
391 140797 : break;
392 154383 : case 18:
393 154383 : cy = mulredc18(z, x, y, m, invm);
394 154383 : break;
395 682389 : case 19:
396 682389 : cy = mulredc19(z, x, y, m, invm);
397 682389 : break;
398 1189374 : case 20:
399 1189374 : cy = mulredc20(z, x, y, m, invm);
400 1189374 : break;
401 0 : default:
402 0 : abort();
403 : }
404 : /* the result of Montgomery's REDC is less than 2^Nbits + N,
405 : thus at most one correction is enough */
406 5792701534 : if (cy != 0)
407 : {
408 : ATTRIBUTE_UNUSED mp_limb_t t;
409 11006143 : t = mpn_sub_n (z, z, m, nn); /* a borrow should always occur here */
410 : ASSERT (t == 1);
411 : }
412 5792701534 : }
413 :
414 : #if 0
415 : /* {rp, n} <- {ap, n}^2/B^n mod {np, n} where B = 2^GMP_NUMB_BITS */
416 : static void
417 : sqrredc (mp_ptr rp, mp_srcptr ap, mp_srcptr np, const mp_size_t n,
418 : const mp_limb_t invm)
419 : {
420 : mp_ptr cp;
421 : mp_size_t i;
422 : mp_limb_t cy, q;
423 : TMP_DECL;
424 :
425 : TMP_MARK;
426 : cp = TMP_ALLOC_LIMBS(2*n);
427 : for (i = 0; i < n; i++)
428 : umul_ppmm (cp[2*i+1], cp[2*i], ap[i], ap[i]);
429 :
430 : if (UNLIKELY(n == 1))
431 : {
432 : q = cp[0] * invm;
433 : rp[0] = mpn_addmul_1 (cp, np, 1, q);
434 : cy = mpn_add_n (rp, rp, cp + 1, 1);
435 : goto end_sqrredc;
436 : }
437 :
438 : if (cp[0] & (mp_limb_t) 1)
439 : /* cp[n] is either some ap[i]^2 mod B or floor(ap[i]^2/B),
440 : the latter is at most floor((B-1)^2/B) = B-2, and the former cannot be
441 : B-1 since -1 is not a square mod 2^n for n >1, thus there is no carry
442 : in cp[n] + ... below */
443 : cp[n] += mpn_add_n (cp, cp, np, n);
444 : /* now {cp, 2n} is even: divide by two */
445 : mpn_rshift (cp, cp, 2*n, 1);
446 : /* now cp[2n-1] is at most B/2-1 */
447 :
448 : for (i = 0; i < n - 1; i++)
449 : {
450 : q = cp[i] * invm;
451 : cp[i] = mpn_addmul_1 (cp + i, np, n, q);
452 : /* accumulate ap[i+1..n-1] * ap[i] */
453 : rp[i] = mpn_addmul_1 (cp + 2 * i + 1, ap + i + 1, n - 1 - i, ap[i]);
454 : }
455 : /* the last iteration did set cp[n-2] to zero, accumulated a[n-1] * a[n-2] */
456 :
457 : /* cp[2n-1] was untouched so far, so it is still at most B/2-1 */
458 : q = cp[n-1] * invm;
459 : rp[n-1] = mpn_addmul_1 (cp + n - 1, np, n, q);
460 : /* rp[n-1] <= floor((B^n-1)*(B-1)/B^n)<=B-2 */
461 :
462 : /* now add {rp, n}, {cp+n, n} and {cp, n-1} */
463 : /* cp[2n-1] still <= B/2-1 */
464 : rp[n-1] += mpn_add_n (rp, rp, cp, n-1); /* no overflow in rp[n-1] + ... */
465 : cy = mpn_add_n (rp, rp, cp + n, n);
466 : /* multiply by 2 */
467 : cy = (cy << 1) + mpn_lshift (rp, rp, n, 1);
468 : end_sqrredc:
469 : while (cy)
470 : cy -= mpn_sub_n (rp, rp, np, n);
471 : TMP_FREE;
472 : }
473 : #endif
474 :
475 : #ifdef HAVE_NATIVE_MULREDC1_N
476 : /* Multiplies y by the 1-limb value of x and does modulo reduction.
477 : The resulting residue may be multiplied by some constant,
478 : which makes this function useful only for cases where, e.g.,
479 : all projective coordinates are multiplied by the same constant.
480 : More precisely it computes:
481 : {z, N} = {y, N} * x / 2^GMP_NUMB_BITS mod {m, N}
482 : */
483 : static void
484 12724368 : mulredc_1 (mp_ptr z, const mp_limb_t x, mp_srcptr y, mp_srcptr m,
485 : const mp_size_t N, const mp_limb_t invm)
486 : {
487 : mp_limb_t cy;
488 :
489 12724368 : switch (N) {
490 519496 : case 1:
491 519496 : cy = mulredc1(z, x, y[0], m[0], invm);
492 519496 : break;
493 381000 : case 2:
494 381000 : cy = mulredc1_2(z, x, y, m, invm);
495 381000 : break;
496 0 : case 3:
497 0 : cy = mulredc1_3(z, x, y, m, invm);
498 0 : break;
499 115568 : case 4:
500 115568 : cy = mulredc1_4(z, x, y, m, invm);
501 115568 : break;
502 0 : case 5:
503 0 : cy = mulredc1_5(z, x, y, m, invm);
504 0 : break;
505 44520 : case 6:
506 44520 : cy = mulredc1_6(z, x, y, m, invm);
507 44520 : break;
508 0 : case 7:
509 0 : cy = mulredc1_7(z, x, y, m, invm);
510 0 : break;
511 0 : case 8:
512 0 : cy = mulredc1_8(z, x, y, m, invm);
513 0 : break;
514 127000 : case 9:
515 127000 : cy = mulredc1_9(z, x, y, m, invm);
516 127000 : break;
517 0 : case 10:
518 0 : cy = mulredc1_10(z, x, y, m, invm);
519 0 : break;
520 0 : case 11:
521 0 : cy = mulredc1_11(z, x, y, m, invm);
522 0 : break;
523 0 : case 12:
524 0 : cy = mulredc1_12(z, x, y, m, invm);
525 0 : break;
526 0 : case 13:
527 0 : cy = mulredc1_13(z, x, y, m, invm);
528 0 : break;
529 11536784 : case 14:
530 11536784 : cy = mulredc1_14(z, x, y, m, invm);
531 11536784 : break;
532 0 : case 15:
533 0 : cy = mulredc1_15(z, x, y, m, invm);
534 0 : break;
535 0 : case 16:
536 0 : cy = mulredc1_16(z, x, y, m, invm);
537 0 : break;
538 0 : case 17:
539 0 : cy = mulredc1_17(z, x, y, m, invm);
540 0 : break;
541 0 : case 18:
542 0 : cy = mulredc1_18(z, x, y, m, invm);
543 0 : break;
544 0 : case 19:
545 0 : cy = mulredc1_19(z, x, y, m, invm);
546 0 : break;
547 0 : case 20:
548 0 : cy = mulredc1_20(z, x, y, m, invm);
549 0 : break;
550 0 : default:
551 : {
552 0 : abort ();
553 : }
554 : }
555 : /* the result of Montgomery's REDC is less than 2^Nbits + N,
556 : thus one correction (at most) is enough */
557 12724368 : if (cy != 0)
558 : {
559 : ATTRIBUTE_UNUSED mp_limb_t t;
560 17488 : t = mpn_sub_n (z, z, m, N); /* a borrow should always occur here */
561 : ASSERT (t == 1);
562 : }
563 12724368 : }
564 : #endif /* ifdef HAVE_NATIVE_MULREDC1_N */
565 : #endif
566 :
567 : static int tune_mulredc_table[] = TUNE_MULREDC_TABLE;
568 : static int tune_sqrredc_table[] = TUNE_SQRREDC_TABLE;
569 :
570 : static void
571 6305443576 : ecm_mulredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
572 : mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp)
573 : {
574 : mp_limb_t cy;
575 : mp_size_t j;
576 :
577 6305443576 : if (nn <= MULREDC_ASSEMBLY_MAX)
578 : {
579 6299991154 : switch (tune_mulredc_table[nn])
580 : {
581 5447234359 : case MPMOD_MULREDC: /* use quadratic assembly mulredc */
582 : #ifdef USE_ASM_REDC
583 5447234359 : mulredc (rp, s1p, s2p, np, nn, invm[0]);
584 5447234359 : break;
585 : #endif /* otherwise go through to the next available mode */
586 415286367 : case MPMOD_MUL_REDC1: /* mpn_mul_n + __gmpn_redc_1 */
587 : #ifdef HAVE___GMPN_REDC_1
588 415286367 : mpn_mul_n (tmp, s1p, s2p, nn);
589 415286367 : REDC1(rp, tmp, np, nn, invm[0]);
590 415286367 : break;
591 : #endif /* otherwise go through to the next available mode */
592 218735214 : case MPMOD_MUL_REDC2: /* mpn_mul_n + __gmpn_redc_2 */
593 : #ifdef HAVE___GMPN_REDC_2
594 218735214 : mpn_mul_n (tmp, s1p, s2p, nn);
595 218735214 : REDC2(rp, tmp, np, nn, invm);
596 218735214 : break;
597 : #endif /* otherwise go through to the next available mode */
598 218735214 : case MPMOD_MUL_REDCN: /* mpn_mul_n + __gmpn_redc_n */
599 : /* disable redc_n for now, since it uses the opposite
600 : precomputed inverse wrt redc_1 and redc_2 */
601 : #ifdef HAVE___GMPN_REDC_Nxxx
602 : mpn_mul_n (tmp, s1p, s2p, nn);
603 : __gmpn_redc_n (rp, tmp, np, nn, invm);
604 : break;
605 : #endif /* otherwise go through to the next available mode */
606 : case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */
607 218735214 : mpn_mul_n (tmp, s1p, s2p, nn);
608 958296976 : for (j = 0; j < nn; j++, tmp++)
609 739561762 : tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]);
610 218735214 : cy = mpn_add_n (rp, tmp - nn, tmp, nn);
611 218735214 : if (cy != 0)
612 829852 : mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
613 218735214 : break;
614 0 : default:
615 : {
616 0 : outputf (OUTPUT_ERROR, "Invalid mulredc mode: %d\n",
617 : tune_mulredc_table[nn]);
618 0 : exit (EXIT_FAILURE);
619 : }
620 : }
621 : }
622 : else /* nn > MULREDC_ASSEMBLY_MAX */
623 : {
624 5452422 : mpn_mul_n (tmp, s1p, s2p, nn);
625 5452422 : ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn);
626 : }
627 6305443576 : }
628 :
629 : static void
630 1498234598 : ecm_sqrredc_basecase_n (mp_ptr rp, mp_srcptr s1p,
631 : mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp)
632 : {
633 : mp_limb_t cy;
634 : mp_size_t j;
635 :
636 1498234598 : if (nn <= MULREDC_ASSEMBLY_MAX)
637 : {
638 1496699025 : switch (tune_sqrredc_table[nn])
639 : {
640 345467175 : case MPMOD_MULREDC: /* use quadratic assembly mulredc */
641 : #ifdef USE_ASM_REDC
642 345467175 : mulredc (rp, s1p, s1p, np, nn, invm[0]);
643 345467175 : break;
644 : #endif /* otherwise go through to the next available mode */
645 901470860 : case MPMOD_MUL_REDC1: /* mpn_sqr + __gmpn_redc_1 */
646 : #ifdef HAVE___GMPN_REDC_1
647 901470860 : mpn_sqr (tmp, s1p, nn);
648 901470860 : REDC1(rp, tmp, np, nn, invm[0]);
649 901470860 : break;
650 : #endif /* otherwise go through to the next available mode */
651 137662930 : case MPMOD_MUL_REDC2: /* mpn_sqr + __gmpn_redc_2 */
652 : #ifdef HAVE___GMPN_REDC_2
653 137662930 : mpn_sqr (tmp, s1p, nn);
654 137662930 : REDC2(rp, tmp, np, nn, invm);
655 137662930 : break;
656 : #endif /* otherwise go through to the next available mode */
657 112098060 : case MPMOD_MUL_REDCN: /* mpn_sqr + __gmpn_redc_n */
658 : /* disable redc_n for now, since it uses the opposite
659 : precomputed inverse wrt redc_1 and redc_2 */
660 : #ifdef HAVE___GMPN_REDC_Nxxx
661 : mpn_sqr (tmp, s1p, nn);
662 : __gmpn_redc_n (rp, tmp, np, nn, invm);
663 : break;
664 : #endif /* otherwise go through to the next available mode */
665 : case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */
666 112098060 : mpn_sqr (tmp, s1p, nn);
667 521495067 : for (j = 0; j < nn; j++, tmp++)
668 409397007 : tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]);
669 112098060 : cy = mpn_add_n (rp, tmp - nn, tmp, nn);
670 112098060 : if (cy != 0)
671 1529587 : mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
672 112098060 : break;
673 0 : default:
674 : {
675 0 : outputf (OUTPUT_ERROR, "Invalid sqrredc mode: %d\n",
676 : tune_sqrredc_table[nn]);
677 0 : exit (EXIT_FAILURE);
678 : }
679 : }
680 : }
681 : else /* nn > MULREDC_ASSEMBLY_MAX */
682 : {
683 1535573 : mpn_sqr (tmp, s1p, nn);
684 1535573 : ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn);
685 : }
686 1498234598 : }
687 :
688 : /* R <- S1 * S2 mod modulus
689 : i.e. R <- S1*S2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS.
690 : Same as ecm_redc_basecase previous, but combined with mul
691 : Neither input argument must be in modulus->temp1
692 : */
693 : static void
694 6250151104 : ecm_mulredc_basecase (mpres_t R, const mpres_t S1, const mpres_t S2,
695 : mpmod_t modulus)
696 : {
697 6250151104 : mp_ptr s1p, s2p, rp = PTR(R);
698 6250151104 : mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
699 :
700 : ASSERT(ALLOC(R) >= nn);
701 : ASSERT(ALLOC(S1) >= nn);
702 : ASSERT(ALLOC(S2) >= nn);
703 6250151104 : s1p = PTR(S1);
704 6250151104 : s2p = PTR(S2);
705 : /* FIXME: S1 and S2 are input and marked const, we mustn't write to them */
706 6338645174 : for (j = ABSIZ(S1); j < nn; j++)
707 88494070 : s1p[j] = 0;
708 6321664022 : for (j = ABSIZ(S2); j < nn; j++)
709 71512918 : s2p[j] = 0;
710 :
711 6250151104 : ecm_mulredc_basecase_n (rp, s1p, s2p, PTR(modulus->orig_modulus), nn,
712 : modulus->Nprim, PTR(modulus->temp1));
713 :
714 6359618808 : MPN_NORMALIZE (rp, nn);
715 6250151104 : SIZ(R) = (SIZ(S1)*SIZ(S2)) < 0 ? (int) -nn : (int) nn;
716 6250151104 : }
717 :
718 : /* R <- S1^2 mod modulus
719 : i.e. R <- S1^2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS.
720 : Same as ecm_redc_basecase previous, but combined with sqr
721 : The input argument must not be in modulus->temp1 */
722 : static void
723 1443821126 : ecm_sqrredc_basecase (mpres_t R, const mpres_t S1, mpmod_t modulus)
724 : {
725 : mp_ptr rp;
726 : mp_ptr s1p;
727 1443821126 : mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
728 :
729 : ASSERT(ALLOC(R) >= nn);
730 : ASSERT(ALLOC(S1) >= nn);
731 1443821126 : rp = PTR(R);
732 1443821126 : s1p = PTR(S1);
733 : /* FIXME: S1 is input and marked const, we mustn't write to it */
734 1481815739 : for (j = ABSIZ(S1); j < nn; j++)
735 37994613 : s1p[j] = 0;
736 :
737 1443821126 : ecm_sqrredc_basecase_n (rp, s1p, PTR(modulus->orig_modulus), nn,
738 : modulus->Nprim, PTR(modulus->temp1));
739 :
740 1484319059 : MPN_NORMALIZE (rp, nn);
741 1443821126 : SIZ(R) = (int) nn;
742 1443821126 : }
743 :
744 : /* If the user asked for a particular representation, always use it.
745 : If repr = ECM_MOD_DEFAULT, use the thresholds.
746 : Don't use base2 if repr = ECM_MOD_NOBASE2.
747 : If a value is <= -16 or >= 16, it is a base2 exponent.
748 : Return a non-zero value if an error occurred.
749 : */
750 : int
751 2511 : mpmod_init (mpmod_t modulus, const mpz_t N, int repr)
752 : {
753 2511 : int base2 = 0, r = 0;
754 2511 : mp_size_t n = mpz_size (N);
755 :
756 2511 : switch (repr)
757 : {
758 1479 : case ECM_MOD_DEFAULT:
759 1479 : if ((base2 = isbase2 (N, BASE2_THRESHOLD)))
760 : {
761 167 : repr = ECM_MOD_BASE2;
762 167 : break;
763 : }
764 : /* else go through */
765 : #if defined( __GNUC__ ) && __GNUC__ >= 7 && !defined(__ICC)
766 : __attribute__ ((fallthrough));
767 : #endif
768 : case ECM_MOD_NOBASE2:
769 1339 : if (mpz_size (N) < MPZMOD_THRESHOLD)
770 1328 : repr = ECM_MOD_MODMULN;
771 11 : else if (mpz_size (N) < REDC_THRESHOLD)
772 11 : repr = ECM_MOD_MPZ;
773 : else
774 0 : repr = ECM_MOD_REDC;
775 : }
776 :
777 : /* now repr is {ECM_MOD_BASE2, ECM_MOD_MODMULN, ECM_MOD_MPZ, ECM_MOD_REDC},
778 : or |repr| >= 16. */
779 :
780 2511 : switch (repr)
781 : {
782 343 : case ECM_MOD_MPZ:
783 343 : outputf (OUTPUT_VERBOSE, "Using mpz_mod\n");
784 343 : mpmod_init_MPZ (modulus, N);
785 343 : break;
786 1739 : case ECM_MOD_MODMULN:
787 1739 : outputf (OUTPUT_VERBOSE, "Using MODMULN [mulredc:%d, sqrredc:%d]\n",
788 : (n <= MULREDC_ASSEMBLY_MAX) ? tune_mulredc_table[n] : 4,
789 : (n <= MULREDC_ASSEMBLY_MAX) ? tune_sqrredc_table[n] : 4);
790 1739 : mpmod_init_MODMULN (modulus, N);
791 1739 : break;
792 242 : case ECM_MOD_REDC:
793 242 : outputf (OUTPUT_VERBOSE, "Using REDC\n");
794 242 : mpmod_init_REDC (modulus, N);
795 242 : break;
796 187 : default: /* base2 case: either repr=ECM_MOD_BASE2, and base2 was
797 : determined above, or |repr| >= 16, and we want base2 = repr */
798 187 : if (repr != ECM_MOD_BASE2)
799 20 : base2 = repr;
800 187 : r = mpmod_init_BASE2 (modulus, base2, N);
801 187 : break;
802 : }
803 :
804 2511 : return r;
805 : }
806 :
807 : void
808 2890706 : mpres_clear (mpres_t a, ATTRIBUTE_UNUSED const mpmod_t modulus)
809 : {
810 2890706 : mpz_clear (a);
811 2890706 : PTR(a) = NULL; /* Make sure we segfault if we access it again */
812 2890706 : }
813 :
814 : void
815 424 : mpmod_init_MPZ (mpmod_t modulus, const mpz_t N)
816 : {
817 : size_t n;
818 :
819 424 : mpz_init_set (modulus->orig_modulus, N);
820 424 : modulus->repr = ECM_MOD_MPZ;
821 :
822 424 : n = mpz_size (N); /* number of limbs of N */
823 424 : modulus->bits = n * GMP_NUMB_BITS; /* Number of bits,
824 : rounded up to full limb */
825 :
826 424 : mpz_init2 (modulus->temp1, 2UL * modulus->bits + GMP_NUMB_BITS);
827 424 : mpz_init2 (modulus->temp2, modulus->bits);
828 424 : mpz_init2 (modulus->aux_modulus, modulus->bits);
829 424 : mpz_set_ui (modulus->aux_modulus, 1UL);
830 : /* we precompute B^(n + ceil(n/2)) mod N, where B=2^GMP_NUMB_BITS */
831 424 : mpz_mul_2exp (modulus->aux_modulus, modulus->aux_modulus,
832 424 : (n + (n + 1) / 2) * GMP_NUMB_BITS);
833 424 : mpz_mod (modulus->aux_modulus, modulus->aux_modulus, N);
834 :
835 424 : return;
836 : }
837 :
838 : int
839 187 : mpmod_init_BASE2 (mpmod_t modulus, const int base2, const mpz_t N)
840 : {
841 : int Nbits;
842 :
843 187 : outputf (OUTPUT_VERBOSE,
844 : "Using special division for factor of 2^%d%c1\n",
845 : abs (base2), (base2 < 0) ? '-' : '+');
846 187 : mpz_init_set (modulus->orig_modulus, N);
847 187 : modulus->repr = ECM_MOD_BASE2;
848 187 : modulus->bits = base2;
849 :
850 187 : Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded
851 : up to full limb */
852 :
853 187 : mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS);
854 187 : mpz_init2 (modulus->temp2, Nbits);
855 :
856 187 : mpz_set_ui (modulus->temp1, 1UL);
857 187 : mpz_mul_2exp (modulus->temp1, modulus->temp1, abs (base2));
858 187 : if (base2 < 0)
859 123 : mpz_sub_ui (modulus->temp1, modulus->temp1, 1UL);
860 : else
861 64 : mpz_add_ui (modulus->temp1, modulus->temp1, 1UL);
862 187 : if (!mpz_divisible_p (modulus->temp1, N))
863 : {
864 10 : outputf (OUTPUT_ERROR, "mpmod_init_BASE2: n does not divide 2^%d%c1\n",
865 : abs (base2), base2 < 0 ? '-' : '+');
866 10 : mpz_clear (modulus->temp2);
867 10 : mpz_clear (modulus->temp1);
868 10 : mpz_clear (modulus->orig_modulus);
869 10 : return ECM_ERROR;
870 : }
871 :
872 177 : modulus->Fermat = 0;
873 177 : if (base2 > 0)
874 : {
875 : unsigned long i;
876 204 : for (i = base2; (i & 1) == 0; i >>= 1);
877 54 : if (i == 1)
878 : {
879 22 : modulus->Fermat = base2;
880 : }
881 : }
882 :
883 177 : return 0;
884 : }
885 :
886 : /* initialize the following fields:
887 : orig_modulus - the original modulus
888 : bits - # of bits of N, rounded up to a multiple of GMP_NUMB_BITS
889 : temp1, temp2 - auxiliary variables
890 : Nprim - -1/N mod B^n where B=2^GMP_NUMB_BITS and n = #limbs(N)
891 : R2 - (2^bits)^2 (mod N)
892 : R3 - (2^bits)^3 (mod N)
893 : multiple - smallest multiple of N >= 2^bits
894 : */
895 : void
896 1766 : mpmod_init_MODMULN (mpmod_t modulus, const mpz_t N)
897 : {
898 : int Nbits;
899 :
900 1766 : mpz_init_set (modulus->orig_modulus, N);
901 :
902 1766 : modulus->repr = ECM_MOD_MODMULN;
903 1766 : Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded
904 : up to full limb */
905 1766 : modulus->bits = Nbits;
906 :
907 1766 : mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS);
908 1766 : mpz_init2 (modulus->temp2, Nbits + 1);
909 1766 : modulus->Nprim = (mp_limb_t*) malloc (mpz_size (N) * sizeof (mp_limb_t));
910 :
911 1766 : mpz_init2 (modulus->R2, Nbits);
912 1766 : mpz_set_ui (modulus->temp1, 1UL);
913 1766 : mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits);
914 1766 : mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus);
915 : /* Now R2 = (2^bits)^2 (mod N) */
916 :
917 1766 : mpz_init2 (modulus->R3, Nbits);
918 1766 : mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits);
919 1766 : mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus);
920 : /* Now R3 = (2^bits)^3 (mod N) */
921 :
922 1766 : mpz_init2 (modulus->multiple, Nbits);
923 1766 : mpz_set_ui (modulus->temp1, 1UL);
924 1766 : mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
925 : /* compute ceil(2^bits / N) */
926 1766 : mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus);
927 1766 : mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus);
928 : /* Now multiple is the smallest multiple of N >= 2^bits */
929 :
930 1766 : mpz_set_ui (modulus->temp1, 1UL);
931 1766 : mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
932 : /* since we directly check even modulus in ecm/pm1/pp1,
933 : N is odd here, thus 1/N mod 2^Nbits always exist */
934 1766 : mpz_invert (modulus->temp2, N, modulus->temp1); /* temp2 = 1/N mod B^n */
935 1766 : mpz_sub (modulus->temp2, modulus->temp1, modulus->temp2);
936 : /* temp2 = -1/N mod B^n */
937 : /* ensure Nprim has all its n limbs correctly set, for ecm_redc_n */
938 8752 : MPN_ZERO(modulus->Nprim, mpz_size (N));
939 1766 : mpn_copyi (modulus->Nprim, PTR(modulus->temp2), ABSIZ(modulus->temp2));
940 1766 : }
941 :
942 : void
943 296 : mpmod_init_REDC (mpmod_t modulus, const mpz_t N)
944 : {
945 : mp_size_t n;
946 : int Nbits;
947 :
948 296 : mpz_init_set (modulus->orig_modulus, N);
949 :
950 296 : n = mpz_size (N);
951 296 : modulus->repr = ECM_MOD_REDC;
952 296 : Nbits = n * GMP_NUMB_BITS; /* Number of bits, rounded
953 : up to full limb */
954 296 : modulus->bits = Nbits;
955 :
956 296 : mpz_init2 (modulus->temp1, 2 * Nbits + GMP_NUMB_BITS);
957 296 : mpz_init2 (modulus->temp2, Nbits);
958 296 : mpz_init2 (modulus->aux_modulus, Nbits);
959 :
960 296 : mpz_set_ui (modulus->temp1, 1UL);
961 296 : mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
962 : /* since we directly check even modulus in ecm/pm1/pp1,
963 : N is odd here, thus 1/N mod 2^Nbits always exist */
964 296 : mpz_invert (modulus->aux_modulus, N, modulus->temp1);
965 :
966 296 : mpz_sub (modulus->aux_modulus, modulus->temp1, modulus->aux_modulus);
967 : /* ensure aux_modulus has n allocated limbs, for ecm_redc_n */
968 296 : if (ABSIZ(modulus->aux_modulus) < n)
969 : {
970 8 : _mpz_realloc (modulus->aux_modulus, n);
971 : /* in case the reallocation fails, _mpz_realloc sets the value to 0 */
972 8 : ASSERT_ALWAYS (mpz_cmp_ui (modulus->aux_modulus, 0) != 0);
973 28 : MPN_ZERO (PTR(modulus->aux_modulus) + ABSIZ(modulus->aux_modulus),
974 : n - ABSIZ(modulus->aux_modulus));
975 : }
976 :
977 296 : mpz_init2 (modulus->R2, Nbits);
978 296 : mpz_set_ui (modulus->temp1, 1UL);
979 296 : mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits);
980 296 : mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus);
981 : /* Now R2 = (2^bits)^2 (mod N) */
982 :
983 296 : mpz_init2 (modulus->R3, Nbits);
984 296 : mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits);
985 296 : mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus);
986 : /* Now R3 = (2^bits)^3 (mod N) */
987 :
988 296 : mpz_init (modulus->multiple);
989 296 : mpz_set_ui (modulus->temp1, 1UL);
990 296 : mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
991 : /* compute ceil(2^bits / N) */
992 296 : mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus);
993 296 : mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus);
994 : /* Now multiple is the largest multiple of N >= 2^bits */
995 296 : }
996 :
997 : void
998 10132 : mpmod_clear (mpmod_t modulus)
999 : {
1000 10132 : mpz_clear (modulus->orig_modulus);
1001 10132 : mpz_clear (modulus->temp1);
1002 10132 : mpz_clear (modulus->temp2);
1003 10132 : if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ)
1004 4889 : mpz_clear (modulus->aux_modulus);
1005 10132 : if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1006 : {
1007 6617 : mpz_clear (modulus->R2);
1008 6617 : mpz_clear (modulus->R3);
1009 6617 : mpz_clear (modulus->multiple);
1010 : }
1011 10132 : if (modulus->repr == ECM_MOD_MODMULN)
1012 4969 : free (modulus->Nprim);
1013 :
1014 10132 : return;
1015 : }
1016 :
1017 : /* initialize r and set all entries from those of modulus */
1018 : void
1019 7486 : mpmod_init_set (mpmod_t r, const mpmod_t modulus)
1020 : {
1021 7486 : const unsigned long Nbits = abs(modulus->bits);
1022 7486 : const unsigned long n = mpz_size (modulus->orig_modulus);
1023 :
1024 7486 : r->repr = modulus->repr;
1025 7486 : r->bits = modulus->bits;
1026 7486 : r->Fermat = modulus->Fermat;
1027 7486 : mpz_init_set (r->orig_modulus, modulus->orig_modulus);
1028 7486 : mpz_init2 (r->temp1, 2 * Nbits + GMP_NUMB_BITS);
1029 7486 : mpz_init2 (r->temp2, Nbits + GMP_NUMB_BITS);
1030 7486 : if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1031 : {
1032 4567 : mpz_init2 (r->multiple, Nbits);
1033 4567 : mpz_init2 (r->R2, Nbits);
1034 4567 : mpz_init2 (r->R3, Nbits);
1035 4567 : mpz_set (r->multiple, modulus->multiple);
1036 4567 : mpz_set (r->R2, modulus->R2);
1037 4567 : mpz_set (r->R3, modulus->R3);
1038 : }
1039 7486 : if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ)
1040 : {
1041 4177 : mpz_init2 (r->aux_modulus, Nbits);
1042 4177 : mpz_set (r->aux_modulus, modulus->aux_modulus);
1043 : /* for ECM_MOD_REDC, ensure r->aux_modulus has n limbs */
1044 4177 : _mpz_realloc (r->aux_modulus, n);
1045 4738 : MPN_ZERO (PTR(r->aux_modulus) + ABSIZ(r->aux_modulus),
1046 : n - ABSIZ(r->aux_modulus));
1047 : }
1048 7486 : if (modulus->repr == ECM_MOD_MODMULN)
1049 : {
1050 3212 : r->Nprim = (mp_limb_t*) malloc (n * sizeof (mp_limb_t));
1051 3212 : mpn_copyi (r->Nprim, modulus->Nprim, n);
1052 : }
1053 7486 : }
1054 :
1055 :
1056 : void
1057 2901620 : mpres_init (mpres_t R, const mpmod_t modulus)
1058 : {
1059 : /* use mpz_sizeinbase since modulus->bits may not be initialized yet */
1060 2901620 : mpz_init2 (R, mpz_sizeinbase (modulus->orig_modulus, 2) + GMP_NUMB_BITS);
1061 2901620 : }
1062 :
1063 : /* realloc R so that it has at least the same number of limbs as modulus */
1064 : void
1065 1485246 : mpres_realloc (mpres_t R, const mpmod_t modulus)
1066 : {
1067 1485246 : if (modulus->repr == ECM_MOD_MODMULN)
1068 696604 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1069 1485246 : }
1070 :
1071 :
1072 : /* a <- b^2 mod (modulus).
1073 : a and b might be equal, but cannot be modulus->temp1.
1074 : Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */
1075 : static inline void
1076 72933821 : mpres_pow_sqr (mpres_t a, const mpres_t b, mpmod_t modulus)
1077 : {
1078 : /* Set temp2 = temp2*temp2 */
1079 72933821 : if (modulus->repr == ECM_MOD_BASE2)
1080 : {
1081 4678530 : mpz_mul (modulus->temp1, b, b);
1082 4678530 : base2mod (a, modulus->temp1, modulus->temp1, modulus);
1083 : }
1084 68255291 : else if (modulus->repr == ECM_MOD_MODMULN)
1085 34128499 : ecm_sqrredc_basecase (a, b, modulus);
1086 : else
1087 : {
1088 : ASSERT(modulus->repr == ECM_MOD_REDC);
1089 34126792 : mpz_mul (modulus->temp1, b, b);
1090 34126792 : REDC (a, modulus->temp1, modulus->temp2, modulus);
1091 : }
1092 72933821 : }
1093 :
1094 : /* a <- b*c mod (modulus)
1095 : a, b, c must not equal modulus->temp1.
1096 : Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */
1097 : static inline void
1098 9446697 : mpres_pow_mul (mpres_t a, const mpres_t b, const mpres_t c, mpmod_t modulus)
1099 : {
1100 : /* Set temp2 = temp2*temp2 */
1101 9446697 : if (modulus->repr == ECM_MOD_BASE2)
1102 : {
1103 459612 : mpz_mul (modulus->temp1, b, c);
1104 459612 : base2mod (a, modulus->temp1, modulus->temp1, modulus);
1105 : }
1106 8987085 : else if (modulus->repr == ECM_MOD_MODMULN)
1107 4493679 : ecm_mulredc_basecase (a, b, c, modulus);
1108 : else
1109 : {
1110 : ASSERT(modulus->repr == ECM_MOD_REDC);
1111 4493406 : mpz_mul (modulus->temp1, b, c);
1112 4493406 : REDC (a, modulus->temp1, modulus->temp2, modulus);
1113 : }
1114 9446697 : }
1115 :
1116 : /* R <- BASE^EXP mod modulus.
1117 : Warning: EXP can be negative (only occurs for P-1 in revision 2982).
1118 : */
1119 : void
1120 5344105 : mpres_pow (mpres_t R, const mpres_t BASE, const mpz_t EXP, mpmod_t modulus)
1121 : {
1122 : size_t k, expnbits, K, lw, n0, i;
1123 : mpres_t *B;
1124 : mp_limb_t w;
1125 :
1126 : ASSERT_NORMALIZED(BASE);
1127 :
1128 5344105 : if (modulus->repr == ECM_MOD_MPZ)
1129 5284533 : mpz_powm (R, BASE, EXP, modulus->orig_modulus);
1130 59572 : else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN ||
1131 20320 : modulus->repr == ECM_MOD_REDC)
1132 : {
1133 : size_t expidx;
1134 : mp_limb_t bitmask, expbits;
1135 59572 : int sgn = mpz_sgn (EXP);
1136 : mpz_t exp;
1137 :
1138 : /* case EXP=0 */
1139 59572 : if (sgn == 0)
1140 : {
1141 105 : mpres_set_ui (R, 1UL, modulus); /* set result to 1 */
1142 : ASSERT_NORMALIZED (R);
1143 105 : return;
1144 : }
1145 :
1146 : /* exp <- |EXP| */
1147 59467 : PTR(exp) = PTR(EXP);
1148 59467 : SIZ(exp) = ABSIZ(EXP);
1149 :
1150 59467 : expidx = mpz_size (exp) - 1; /* point at most significant limb */
1151 59467 : expbits = mpz_getlimbn (exp, expidx); /* get most significant limb */
1152 : ASSERT (expbits != 0);
1153 :
1154 : /* Scan for the MSB in expbits */
1155 59467 : bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1);
1156 2109731 : for (; (bitmask & expbits) == 0; bitmask >>= 1);
1157 :
1158 : /* here the most significant limb with any set bits is in expbits, */
1159 : /* bitmask is set to mask in the msb of expbits */
1160 :
1161 59467 : k = 1; /* sliding window size */
1162 59467 : expnbits = mpz_sizeinbase (exp, 2);
1163 : /* the average number of multiplications is 2^(k-1) + expnbits / (k+1) */
1164 291875 : while ((1 << (k-1)) + expnbits / (k + 1) > (1 << k) + expnbits / (k + 2))
1165 232408 : k ++;
1166 : /* precompute BASE^i for i = 2, 3, 5, ..., 2^k-1 */
1167 59467 : K = 1UL << (k - 1);
1168 59467 : B = malloc (K * sizeof (mpres_t));
1169 1543665 : for (i = 0; i < K; i++)
1170 : {
1171 1484198 : mpres_init (B[i], modulus);
1172 1484198 : mpres_realloc (B[i], modulus);
1173 1484198 : if (i == 0) /* store BASE^2 in B[0] */
1174 59467 : mpres_pow_sqr (B[i], BASE, modulus);
1175 1424731 : else if (i == 1)
1176 58955 : mpres_pow_mul (B[i], B[0], BASE, modulus);
1177 : else
1178 1365776 : mpres_pow_mul (B[i], B[i-1], B[0], modulus);
1179 : }
1180 :
1181 59467 : mpz_set (modulus->temp2, BASE);
1182 59467 : bitmask >>= 1;
1183 59467 : w = 0; /* invariant: temp2 has to be multiplied by BASE^w */
1184 59467 : lw = 0; /* number of bits in w */
1185 59467 : expnbits --; /* number of remaining bits to deal with */
1186 59467 : n0 = 0; /* smallest bit index of current window */
1187 :
1188 : while (1)
1189 : {
1190 57867979 : for ( ; bitmask != 0; bitmask >>= 1, expnbits--)
1191 : {
1192 : /* Set temp2 = temp2 ^ 2 */
1193 56945245 : mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus);
1194 56945245 : w = w << 1;
1195 56945245 : lw += (lw != 0);
1196 :
1197 : /* If bit is 1, accumulate in w */
1198 56945245 : if (expbits & bitmask)
1199 : {
1200 28487159 : w ++;
1201 28487159 : if (lw == 0)
1202 : {
1203 8021966 : lw = 1;
1204 : /* n0 points to the smallest bit of the current window */
1205 8021966 : n0 = (expnbits >= k) ? expnbits - k : 0;
1206 : }
1207 : /* if the current bit is the smallest one of the window,
1208 : we multiply by BASE^w */
1209 28487159 : if (mpz_scan1 (exp, n0) == expnbits - 1)
1210 : {
1211 : ASSERT(w/2 < K);
1212 8021966 : mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2],
1213 8021966 : modulus->temp2, modulus);
1214 8021966 : w = lw = 0;
1215 : }
1216 : }
1217 : }
1218 922734 : if (expidx == 0) /* if we just processed the least */
1219 59467 : break; /* significant limb, we are done */
1220 863267 : expidx --;
1221 863267 : expbits = mpz_getlimbn (exp, expidx);
1222 863267 : bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
1223 : }
1224 : ASSERT(w/2 < K);
1225 59467 : if (w != 0)
1226 0 : mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2],
1227 0 : modulus->temp2, modulus);
1228 1543665 : for (i = 0; i < K; i++)
1229 1484198 : mpres_clear (B[i], modulus);
1230 59467 : free (B);
1231 59467 : mpz_set (R, modulus->temp2);
1232 :
1233 : /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|.
1234 : If EXP was negative, do a modular inverse */
1235 59467 : if (sgn < 0)
1236 202 : mpres_invert (R, R, modulus);
1237 : } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */
1238 : ASSERT_NORMALIZED (R);
1239 : }
1240 :
1241 :
1242 : /* Returns 1 if S == 0 (mod modulus), 0 otherwise */
1243 :
1244 : int
1245 340787144 : mpres_is_zero (const mpres_t S, mpmod_t modulus)
1246 : {
1247 340787144 : mpz_mod (modulus->temp1, S, modulus->orig_modulus);
1248 : /* For all currently implemented representations, a zero residue has zero
1249 : integer representation */
1250 340787144 : return (mpz_sgn (modulus->temp1) == 0) ? 1 : 0;
1251 : }
1252 :
1253 : /* R <- BASE^EXP mod modulus */
1254 : void
1255 247 : mpres_ui_pow (mpres_t R, const unsigned long BASE, const mpres_t EXP,
1256 : mpmod_t modulus)
1257 : {
1258 247 : if (modulus->repr == ECM_MOD_MPZ)
1259 : {
1260 135 : mpz_set_ui (modulus->temp1, BASE);
1261 135 : mpz_powm (R, modulus->temp1, EXP, modulus->orig_modulus);
1262 : }
1263 112 : else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN ||
1264 48 : modulus->repr == ECM_MOD_REDC)
1265 : {
1266 : size_t expidx;
1267 : mp_limb_t bitmask, expbits;
1268 :
1269 112 : expidx = mpz_size (EXP) - 1; /* point at most significant limb */
1270 112 : expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */
1271 112 : bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
1272 :
1273 : /* case EXP=0 */
1274 112 : if (mpz_sgn (EXP) == 0)
1275 : {
1276 0 : mpres_set_ui (R, 1UL, modulus); /* set result to 1 */
1277 : ASSERT_NORMALIZED (R);
1278 0 : return;
1279 : }
1280 :
1281 : ASSERT (mpz_size (EXP) > 0); /* probably redundant with _sgn() test */
1282 112 : expidx = mpz_size (EXP) - 1; /* point at most significant limb */
1283 112 : expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */
1284 : ASSERT (expbits != 0);
1285 :
1286 : /* Scan for the MSB in expbits */
1287 112 : bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1);
1288 3627 : for (; (bitmask & expbits) == 0; bitmask >>= 1);
1289 :
1290 : /* here the most significant limb with any set bits is in expbits, */
1291 : /* bitmask is set to mask in the msb of expbits */
1292 :
1293 112 : mpz_set_ui (modulus->temp2, BASE); /* temp2 = BASE */
1294 112 : if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1295 : {
1296 96 : mpz_mul_2exp (modulus->temp1, modulus->temp2, modulus->bits);
1297 96 : mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus);
1298 : }
1299 112 : bitmask >>= 1;
1300 :
1301 :
1302 : while (1)
1303 : {
1304 16178058 : for ( ; bitmask != 0; bitmask >>= 1)
1305 : {
1306 15929109 : mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus);
1307 :
1308 : /* If bit is 1, set temp2 = temp2 * BASE */
1309 15929109 : if (expbits & bitmask)
1310 : {
1311 7955730 : if (BASE == 2UL)
1312 : {
1313 26 : mpz_mul_2exp (modulus->temp2, modulus->temp2, 1);
1314 26 : if (mpz_cmp (modulus->temp2, modulus->orig_modulus) >= 0)
1315 12 : mpz_sub (modulus->temp2, modulus->temp2, modulus->orig_modulus);
1316 : }
1317 : else
1318 : {
1319 7955704 : mpz_mul_ui (modulus->temp1, modulus->temp2, BASE);
1320 7955704 : mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus);
1321 : }
1322 : }
1323 : }
1324 248949 : if (expidx == 0) /* if we just processed the least */
1325 112 : break; /* significant limb, we are done */
1326 248837 : expidx--;
1327 248837 : expbits = mpz_getlimbn (EXP, expidx);
1328 248837 : bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
1329 : }
1330 112 : mpz_set (R, modulus->temp2);
1331 :
1332 : /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|.
1333 : If EXP was negative, do a modular inverse */
1334 112 : if (mpz_sgn (EXP) < 0)
1335 : {
1336 0 : mpres_invert (R, R, modulus);
1337 : }
1338 : } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */
1339 : ASSERT_NORMALIZED (R);
1340 : }
1341 :
1342 : /* We use here the algorithm described in "Fast Modular Reduction" from
1343 : Hasenplaugh, Gaubatz and Gobal, Arith'18, 2007: assuming N has n limbs,
1344 : we have precomputed C = B^(n + ceil(n/2)) mod N. */
1345 : static void
1346 351006637 : mpres_mpz_mod (mpres_t R, mpz_t T, mpz_t N, mpz_t C)
1347 : {
1348 351006637 : size_t n = mpz_size (N);
1349 351006637 : size_t t = mpz_size (T);
1350 351006637 : size_t m = n + (n + 1) / 2; /* n + ceil(n/2) */
1351 :
1352 351006637 : if (t > m && n > 1) /* if n=1, then m=2, thus h=0 */
1353 : {
1354 87988003 : size_t c = mpz_size (C);
1355 : size_t h, l;
1356 : mp_ptr rp;
1357 87988003 : mp_ptr tp = PTR(T);
1358 :
1359 : /* Warning: we might have t > 2n. In that case we reduce
1360 : {tp+l+m, t-(m+l)} where l = t-2n. */
1361 87988003 : l = (t > 2 * n) ? t - 2 * n : 0;
1362 :
1363 87988003 : tp += l;
1364 87988003 : h = t - (m + l); /* since t-l <= 2n and m = n + ceil(n/2),
1365 : we have h <= n - ceil(n/2) = floor(n/2).
1366 : On the other hand, if l=0 we have h = t-m > 0;
1367 : if l>0, then l=t-2n, thus h=2n-m = floor(n/2) > 0
1368 : since n > 1. */
1369 87988003 : mpz_realloc (R, c + h);
1370 87988003 : rp = PTR(R);
1371 87988003 : if (c > h)
1372 87417036 : mpn_mul (rp, PTR(C), c, tp + m, h);
1373 : else
1374 570967 : mpn_mul (rp, tp + m, h, PTR(C), c);
1375 : /* now add {rp, c+h} to {tp, m}: we have c <= n and h <= n/2,
1376 : thus c + h <= m */
1377 87988003 : if (c + h > m) abort();
1378 87988003 : tp[m] = mpn_add (tp, tp, m, rp, c + h);
1379 87988003 : m += l + tp[m];
1380 87988003 : tp -= l; /* put back the low l limbs */
1381 87988003 : MPN_NORMALIZE(tp, m);
1382 87988003 : SIZ(T) = (SIZ(T) > 0) ? m : -m;
1383 : }
1384 :
1385 351006637 : mpz_mod (R, T, N);
1386 351006637 : }
1387 :
1388 : void
1389 6726466993 : mpres_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
1390 : {
1391 : ASSERT_NORMALIZED (S1);
1392 : ASSERT_NORMALIZED (S2);
1393 :
1394 : #ifdef WANT_ASSERT_EXPENSIVE
1395 : mpz_t test1, test2, test_result1, test_result2;
1396 : ASSERT_ALWAYS (S1 != modulus->temp1 && S2 != modulus->temp1 &&
1397 : R != modulus->temp1);
1398 : mpz_init (test1);
1399 : mpz_init (test2);
1400 : mpz_init (test_result1);
1401 : mpz_init (test_result2);
1402 : mpres_get_z (test1, S1, modulus);
1403 : mpres_get_z (test2, S2, modulus);
1404 : mpz_mul (test_result1, test1, test2);
1405 : mpz_mod (test_result1, test_result1, modulus->orig_modulus);
1406 : #endif
1407 :
1408 6726466993 : if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768))
1409 : {
1410 40743 : mp_size_t n = modulus->Fermat / GMP_NUMB_BITS;
1411 : unsigned long k;
1412 : mp_srcptr s1p, s2p;
1413 : mp_size_t s1s, s2s;
1414 :
1415 40743 : MPZ_REALLOC (R, n + 1);
1416 40743 : s1p = PTR(S1);
1417 40743 : s1s = SIZ(S1);
1418 40743 : s2p = PTR(S2);
1419 40743 : s2s = SIZ(S2);
1420 :
1421 40743 : k = mpn_fft_best_k (n, S1 == S2);
1422 : ASSERT(mpn_fft_next_size (n, k) == n);
1423 :
1424 40743 : if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus))
1425 : {
1426 6914 : s1p = PTR(modulus->temp1);
1427 6914 : s1s = SIZ(modulus->temp1);
1428 : }
1429 40743 : if (S1 == S2)
1430 : {
1431 13507 : s2p = s1p;
1432 13507 : s2s = s1s;
1433 : }
1434 27236 : else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus))
1435 : {
1436 3651 : s2p = PTR(modulus->temp2);
1437 3651 : s2s = SIZ(modulus->temp2);
1438 : }
1439 :
1440 : /* mpn_mul_fft() computes the product modulo B^n + 1, where
1441 : B = 2^(machine word size in bits). So the result can be = B^n,
1442 : in that case R is set to zero and 1 is returned as carry-out.
1443 : In all other cases 0 is returned. Hence the complete result is
1444 : R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */
1445 40743 : PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k);
1446 40743 : n ++;
1447 97294 : MPN_NORMALIZE(PTR(R), n);
1448 40743 : SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n;
1449 :
1450 40743 : return;
1451 : }
1452 :
1453 6726426250 : switch (modulus->repr)
1454 : {
1455 17069513 : case ECM_MOD_BASE2:
1456 17069513 : mpz_mul (modulus->temp1, S1, S2);
1457 17069513 : base2mod (R, modulus->temp1, modulus->temp1, modulus);
1458 17069513 : break;
1459 6234295496 : case ECM_MOD_MODMULN:
1460 6234295496 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1461 6234295496 : ecm_mulredc_basecase (R, S1, S2, modulus);
1462 6234295496 : break;
1463 231968646 : case ECM_MOD_REDC:
1464 231968646 : mpz_mul (modulus->temp1, S1, S2);
1465 231968646 : REDC (R, modulus->temp1, modulus->temp2, modulus);
1466 231968646 : break;
1467 243092595 : default: /* case ECM_MOD_MPZ */
1468 243092595 : mpz_mul (modulus->temp1, S1, S2);
1469 243092595 : mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus,
1470 243092595 : modulus->aux_modulus);
1471 243092595 : break;
1472 : }
1473 : ASSERT_NORMALIZED (R);
1474 :
1475 : #ifdef WANT_ASSERT_EXPENSIVE
1476 : mpres_get_z (test_result2, R, modulus);
1477 : if (mpz_cmp (test_result1, test_result2) != 0)
1478 : {
1479 : printf ("mpres_mul and mpz_mul/mpz_mod produced different results.\n");
1480 : gmp_printf ("input 1: %Zd\n", test1);
1481 : gmp_printf ("input 2: %Zd\n", test2);
1482 : gmp_printf ("mpres_mul: %Zd\n", test_result2);
1483 : gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1);
1484 : abort ();
1485 : }
1486 : mpz_clear (test1);
1487 : mpz_clear (test2);
1488 : mpz_clear (test_result1);
1489 : mpz_clear (test_result2);
1490 : #endif
1491 : }
1492 :
1493 : /* R <- S1^2 mod modulus */
1494 : void
1495 1632909584 : mpres_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus)
1496 : {
1497 : ASSERT_NORMALIZED (S1);
1498 :
1499 : #ifdef WANT_ASSERT_EXPENSIVE
1500 : mpz_t test1, test_result1, test_result2;
1501 : ASSERT_ALWAYS (S1 != modulus->temp1 && R != modulus->temp1);
1502 : mpz_init (test1);
1503 : mpz_init (test_result1);
1504 : mpz_init (test_result2);
1505 : mpres_get_z (test1, S1, modulus);
1506 : mpz_mul (test_result1, test1, test1);
1507 : mpz_mod (test_result1, test_result1, modulus->orig_modulus);
1508 : #endif
1509 :
1510 1632909584 : if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768))
1511 : {
1512 13507 : mpres_mul (R, S1, S1, modulus);
1513 13507 : return;
1514 : }
1515 :
1516 1632896077 : switch (modulus->repr)
1517 : {
1518 7435109 : case ECM_MOD_BASE2:
1519 7435109 : mpz_mul (modulus->temp1, S1, S1);
1520 7435109 : base2mod (R, modulus->temp1, modulus->temp1, modulus);
1521 7435109 : break;
1522 1409692627 : case ECM_MOD_MODMULN:
1523 1409692627 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1524 1409692627 : ecm_sqrredc_basecase (R, S1, modulus);
1525 1409692627 : break;
1526 107854299 : case ECM_MOD_REDC:
1527 107854299 : mpz_mul (modulus->temp1, S1, S1);
1528 107854299 : REDC (R, modulus->temp1, modulus->temp2, modulus);
1529 107854299 : break;
1530 107914042 : default: /* case ECM_MOD_MPZ */
1531 107914042 : mpz_mul (modulus->temp1, S1, S1);
1532 107914042 : mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus,
1533 107914042 : modulus->aux_modulus);
1534 107914042 : break;
1535 : }
1536 : ASSERT_NORMALIZED (R);
1537 :
1538 : #ifdef WANT_ASSERT_EXPENSIVE
1539 : mpres_get_z (test_result2, R, modulus);
1540 : if (mpz_cmp (test_result1, test_result2) != 0)
1541 : {
1542 : printf ("mpres_sqr and mpz_mul/mpz_mod produced different results.\n");
1543 : gmp_printf ("input 1: %Zd\n", test1);
1544 : gmp_printf ("mpres_mul: %Zd\n", test_result2);
1545 : gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1);
1546 : abort ();
1547 : }
1548 : mpz_clear (test1);
1549 : mpz_clear (test_result1);
1550 : mpz_clear (test_result2);
1551 : #endif
1552 : }
1553 :
1554 : /* R <- S * n mod modulus */
1555 : void
1556 9715225 : mpres_mul_ui (mpres_t R, const mpres_t S, const unsigned long n,
1557 : mpmod_t modulus)
1558 : {
1559 : ASSERT_NORMALIZED (S);
1560 9715225 : mpz_mul_ui (modulus->temp1, S, n);
1561 : /* This is the same for all methods: just reduce with original modulus */
1562 9715225 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1563 : ASSERT_NORMALIZED (R);
1564 9715225 : }
1565 :
1566 : /* R <- S * 2^k mod modulus */
1567 : void
1568 152 : mpres_mul_2exp (mpres_t R, const mpres_t S, const unsigned long k,
1569 : mpmod_t modulus)
1570 : {
1571 : ASSERT_NORMALIZED (S);
1572 152 : mpz_mul_2exp (modulus->temp1, S, k);
1573 : /* This is the same for all methods: just reduce with original modulus */
1574 152 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1575 : ASSERT_NORMALIZED (R);
1576 152 : }
1577 :
1578 : /* This function multiplies an integer in mpres_t form with an integer in
1579 : mpz_t form, and stores the output in mpz_t form. The advantage is that
1580 : one REDC suffices to reduce the product and convert it to non-Montgomery
1581 : representation. */
1582 :
1583 : void
1584 12896712 : mpres_mul_z_to_z (mpz_t R, const mpres_t S1, const mpz_t S2, mpmod_t modulus)
1585 : {
1586 : ASSERT_NORMALIZED (S1);
1587 :
1588 12896712 : if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)
1589 : {
1590 37 : mp_size_t n = modulus->Fermat / GMP_NUMB_BITS;
1591 : unsigned long k;
1592 37 : mp_srcptr s1p = PTR(S1), s2p = PTR(S2);
1593 37 : mp_size_t s1s = SIZ(S1), s2s = SIZ(S2);
1594 :
1595 37 : MPZ_REALLOC (R, n + 1);
1596 37 : k = mpn_fft_best_k (n, S1 == S2);
1597 : ASSERT(mpn_fft_next_size (n, k) == n);
1598 :
1599 37 : if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus))
1600 : {
1601 0 : s1p = PTR(modulus->temp1);
1602 0 : s1s = SIZ(modulus->temp1);
1603 : }
1604 37 : if (S1 == S2)
1605 : {
1606 0 : s2p = s1p;
1607 0 : s2s = s1s;
1608 : }
1609 37 : else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus))
1610 : {
1611 0 : s2p = PTR(modulus->temp2);
1612 0 : s2s = SIZ(modulus->temp2);
1613 : }
1614 :
1615 : /* mpn_mul_fft() computes the product modulo B^n + 1, where
1616 : B = 2^(machine word size in bits). So the result can be = B^n,
1617 : in that case R is set to zero and 1 is returned as carry-out.
1618 : In all other cases 0 is returned. Hence the complete result is
1619 : R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */
1620 37 : PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k);
1621 37 : n ++;
1622 1607 : MPN_NORMALIZE(PTR(R), n);
1623 37 : SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n;
1624 37 : mpz_mod (R, R, modulus->orig_modulus);
1625 :
1626 37 : return;
1627 : }
1628 :
1629 12896675 : switch (modulus->repr)
1630 : {
1631 23965 : case ECM_MOD_BASE2:
1632 23965 : if (mpz_sizeinbase (S2, 2) > (unsigned) abs (modulus->bits))
1633 : {
1634 1890 : base2mod (modulus->temp2, S2, modulus->temp1, modulus);
1635 1890 : mpz_mul (modulus->temp1, S1, modulus->temp2);
1636 : }
1637 : else
1638 22075 : mpz_mul (modulus->temp1, S1, S2);
1639 23965 : base2mod (R, modulus->temp1, modulus->temp1, modulus);
1640 23965 : mpz_mod (R, R, modulus->orig_modulus);
1641 23965 : break;
1642 9126249 : case ECM_MOD_MODMULN:
1643 9126249 : if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
1644 : {
1645 34576 : mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
1646 34576 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1647 34576 : ecm_mulredc_basecase (R, S1, modulus->temp2, modulus);
1648 34576 : mpz_mod (R, R, modulus->orig_modulus);
1649 : }
1650 : else
1651 : {
1652 9091673 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1653 9091673 : if (ABSIZ(S2) < modulus->bits / GMP_NUMB_BITS)
1654 : {
1655 : mpz_t t;
1656 384861 : mpz_init2 (t, modulus->bits);
1657 384861 : mpz_set (t, S2);
1658 384861 : ecm_mulredc_basecase (R, S1, t, modulus);
1659 384861 : mpz_clear (t);
1660 : }
1661 : else
1662 8706812 : ecm_mulredc_basecase (R, S1, S2, modulus);
1663 9091673 : mpz_mod (R, R, modulus->orig_modulus);
1664 : }
1665 9126249 : break;
1666 1024371 : case ECM_MOD_REDC:
1667 1024371 : if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
1668 : {
1669 2220 : mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
1670 2220 : mpz_mul (modulus->temp1, S1, modulus->temp2);
1671 : }
1672 : else
1673 1022151 : mpz_mul (modulus->temp1, S1, S2);
1674 1024371 : REDC (R, modulus->temp1, modulus->temp2, modulus);
1675 1024371 : mpz_mod (R, R, modulus->orig_modulus);
1676 1024371 : break;
1677 2722090 : default:
1678 2722090 : if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
1679 : {
1680 73585 : mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
1681 73585 : mpz_mul (modulus->temp1, S1, modulus->temp2);
1682 : }
1683 : else
1684 2648505 : mpz_mul (modulus->temp1, S1, S2);
1685 2722090 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1686 2722090 : break;
1687 : }
1688 : ASSERT_NORMALIZED (R);
1689 : }
1690 :
1691 :
1692 : /* Sets R = S * c, for some constant c that is coprime to modulus.
1693 : This is primarily useful for multiplying numbers together for a gcd with
1694 : modulus. The advantage is that we don't need to convert the mpz_t
1695 : to Montgomery representation before applying REDC. */
1696 :
1697 : void
1698 6385993 : mpres_set_z_for_gcd (mpres_t R, const mpz_t S, mpmod_t modulus)
1699 : {
1700 6385993 : mpz_mod (R, S, modulus->orig_modulus);
1701 : ASSERT_NORMALIZED (R);
1702 6385993 : }
1703 :
1704 : /* Companion function to mpres_set_z_for_gcd(). It divides by c^n.
1705 : In case of MULREDC with k b-bit words, c = 1/2^(b*k), so we multiply
1706 : by 2^(n*b*k). The purpose is to fix products of n terms collected by
1707 : using mpres_set_z_for_gcd(), so that we can still get the exact residue. */
1708 :
1709 : void
1710 596 : mpres_set_z_for_gcd_fix (mpres_t R, const mpres_t S, const mpz_t n, mpmod_t modulus)
1711 : {
1712 596 : switch (modulus->repr)
1713 : {
1714 372 : case ECM_MOD_MODMULN:
1715 : case ECM_MOD_REDC:
1716 : {
1717 : mpres_t po2;
1718 : mpz_t nb;
1719 :
1720 372 : mpz_init (nb);
1721 372 : mpres_init (po2, modulus);
1722 :
1723 : /* Note: modulus->bits is always non-negative for MODMULN and REDC */
1724 372 : mpz_mul_ui (nb, n, modulus->bits);
1725 372 : mpres_set_ui (po2, 2, modulus);
1726 372 : mpres_pow (po2, po2, nb, modulus);
1727 372 : mpres_mul (R, S, po2, modulus);
1728 :
1729 372 : mpz_clear (nb);
1730 372 : mpres_clear (po2, modulus);
1731 : }
1732 : }
1733 596 : }
1734 :
1735 :
1736 : /* R <- S / 2^n mod modulus. Does not need to be fast. */
1737 : void
1738 12851 : mpres_div_2exp (mpres_t R, const mpres_t S, const unsigned int n,
1739 : mpmod_t modulus)
1740 : {
1741 : int i;
1742 : ASSERT_NORMALIZED (S);
1743 :
1744 12851 : if (n == 0)
1745 : {
1746 0 : mpres_set (R, S, modulus);
1747 : ASSERT_NORMALIZED (R);
1748 0 : return;
1749 : }
1750 :
1751 12851 : if (mpz_odd_p (S))
1752 : {
1753 : ASSERT (mpz_odd_p (modulus->orig_modulus));
1754 4590 : mpz_add (R, S, modulus->orig_modulus);
1755 4590 : mpz_tdiv_q_2exp (R, R, 1);
1756 : }
1757 : else
1758 8261 : mpz_tdiv_q_2exp (R, S, 1);
1759 :
1760 20426 : for (i = n ; i > 1; i--)
1761 : {
1762 7575 : if (mpz_odd_p (R))
1763 : {
1764 : ASSERT (mpz_odd_p (modulus->orig_modulus));
1765 2298 : mpz_add (R, R, modulus->orig_modulus);
1766 : }
1767 7575 : mpz_tdiv_q_2exp (R, R, 1);
1768 : }
1769 :
1770 : ASSERT_NORMALIZED (R);
1771 : }
1772 :
1773 : void
1774 2560 : mpres_add_ui (mpres_t R, const mpres_t S, const unsigned long n,
1775 : mpmod_t modulus)
1776 : {
1777 : ASSERT_NORMALIZED (S);
1778 2560 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1779 : {
1780 569 : mpz_add_ui (R, S, n);
1781 569 : if (mpz_cmp (R, modulus->orig_modulus) > 0)
1782 38 : mpz_sub (R, R, modulus->orig_modulus); /* This assumes modulus >= n */
1783 : }
1784 1991 : else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1785 : {
1786 1991 : mpz_set_ui (modulus->temp1, n);
1787 1991 : mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
1788 1991 : mpz_add (modulus->temp1, modulus->temp1, S);
1789 1991 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1790 : }
1791 : ASSERT_NORMALIZED (R);
1792 2560 : }
1793 :
1794 : /* R <- S1 + S2 mod modulus */
1795 : void
1796 1549778592 : mpres_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
1797 : {
1798 : ASSERT_NORMALIZED (S1);
1799 : ASSERT_NORMALIZED (S2);
1800 1549778592 : mpz_add (R, S1, S2);
1801 1549778592 : if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) &&
1802 1385479182 : ABSIZ(R) > ABSIZ(modulus->orig_modulus))
1803 : {
1804 28701423 : if (SIZ(R) > 0)
1805 24994613 : mpz_sub (R, R, modulus->multiple);
1806 : else
1807 3706810 : mpz_add (R, R, modulus->multiple);
1808 : /* N <= since multiple < 2^Nbits + N, now |R| < B */
1809 : }
1810 : ASSERT_NORMALIZED (R);
1811 1549778592 : }
1812 :
1813 : /* R <- S - n mod modulus
1814 : If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to
1815 : Montgomery representation before substracting
1816 : */
1817 : void
1818 535040529 : mpres_sub_ui (mpres_t R, const mpres_t S, const unsigned long n,
1819 : mpmod_t modulus)
1820 : {
1821 : ASSERT_NORMALIZED (S);
1822 535040529 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1823 : {
1824 2056727 : mpz_sub_ui (R, S, n);
1825 2056727 : if (mpz_sgn (R) < 0)
1826 168 : mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */
1827 : }
1828 532983802 : else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1829 : {
1830 532983802 : mpz_set_ui (modulus->temp1, n);
1831 532983802 : mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
1832 532983802 : mpz_sub (modulus->temp1, S, modulus->temp1);
1833 532983802 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1834 : }
1835 : ASSERT_NORMALIZED (R);
1836 535040529 : }
1837 :
1838 : #if 0
1839 : /* R <- n - S mod modulus
1840 : If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to
1841 : Montgomery representation before substracting
1842 : */
1843 : void
1844 : mpres_ui_sub (mpres_t R, const unsigned long n ,const mpres_t S,
1845 : mpmod_t modulus)
1846 : {
1847 : ASSERT_NORMALIZED (S);
1848 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1849 : {
1850 : mpz_ui_sub (R, n, S);
1851 : if (mpz_sgn (R) < 0)
1852 : mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */
1853 : }
1854 : else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1855 : {
1856 : mpz_set_ui (modulus->temp1, n);
1857 : mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
1858 : mpz_sub (modulus->temp1, modulus->temp1, S);
1859 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1860 : }
1861 : ASSERT_NORMALIZED (R);
1862 : }
1863 : #endif
1864 :
1865 : /* R <- S1 - S2 mod modulus */
1866 : void
1867 6255342193 : mpres_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
1868 : {
1869 : ASSERT_NORMALIZED (S1);
1870 : ASSERT_NORMALIZED (S2);
1871 6255342193 : mpz_sub (R, S1, S2);
1872 6255342193 : if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) &&
1873 6062129372 : ABSIZ(R) > ABSIZ(modulus->orig_modulus))
1874 : {
1875 87388492 : if (SIZ(R) > 0)
1876 44990659 : mpz_sub (R, R, modulus->multiple);
1877 : else
1878 42397833 : mpz_add (R, R, modulus->multiple);
1879 : /* N <= since multiple < 2^Nbits + N, now |R| < B */
1880 : }
1881 : ASSERT_NORMALIZED (R);
1882 6255342193 : }
1883 :
1884 : void
1885 11974 : mpres_set_z (mpres_t R, const mpz_t S, mpmod_t modulus)
1886 : {
1887 11974 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1888 2282 : mpz_mod (R, S, modulus->orig_modulus);
1889 9692 : else if (modulus->repr == ECM_MOD_MODMULN)
1890 : {
1891 8594 : mpz_mod (modulus->temp2, S, modulus->orig_modulus);
1892 8594 : ecm_mulredc_basecase (R, modulus->temp2, modulus->R2, modulus);
1893 : }
1894 1098 : else if (modulus->repr == ECM_MOD_REDC)
1895 : {
1896 1098 : mpz_mod (modulus->temp2, S, modulus->orig_modulus);
1897 1098 : mpz_mul (modulus->temp1, modulus->temp2, modulus->R2);
1898 1098 : REDC (R, modulus->temp1, modulus->temp2, modulus);
1899 : }
1900 : ASSERT_NORMALIZED (R);
1901 11974 : }
1902 :
1903 : /* R and S must not be modulus->temp1 */
1904 : void
1905 16723361 : mpres_get_z (mpz_t R, const mpres_t S, mpmod_t modulus)
1906 : {
1907 : ASSERT_NORMALIZED (S);
1908 16723361 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1909 : {
1910 990449 : mpz_mod (R, S, modulus->orig_modulus);
1911 : }
1912 15732912 : else if (modulus->repr == ECM_MOD_MODMULN)
1913 : {
1914 15346253 : mpz_set (modulus->temp1, S);
1915 15346253 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
1916 15346253 : ecm_redc_basecase (R, modulus->temp1, modulus);
1917 15346253 : mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */
1918 : }
1919 386659 : else if (modulus->repr == ECM_MOD_REDC)
1920 : {
1921 386659 : REDC (R, S, modulus->temp1, modulus);
1922 386659 : mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */
1923 : }
1924 : #ifdef DEBUG
1925 : else
1926 : {
1927 : fprintf (ECM_STDERR, "mpres_get_z: Unexpected representation %d\n",
1928 : modulus->repr);
1929 : exit (EXIT_FAILURE);
1930 : }
1931 : #endif
1932 16723361 : }
1933 :
1934 : /* R <- n mod modulus
1935 : If repr==ECM_MOD_MPZ or ECM_MOD_BASE2, we convert n to
1936 : Montgomery representation
1937 : */
1938 : void
1939 287759 : mpres_set_ui (mpres_t R, const unsigned long n, mpmod_t modulus)
1940 : {
1941 287759 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1942 : {
1943 34058 : mpz_set_ui (R, n);
1944 34058 : mpz_mod (R, R, modulus->orig_modulus);
1945 : }
1946 253701 : else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1947 : {
1948 253701 : mpz_set_ui (modulus->temp1, n);
1949 253701 : mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
1950 253701 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1951 : }
1952 : ASSERT_NORMALIZED (R);
1953 287759 : }
1954 :
1955 : /* same as previous but with signed long */
1956 : void
1957 86 : mpres_set_si (mpres_t R, const long n, mpmod_t modulus)
1958 : {
1959 86 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
1960 : {
1961 3 : mpz_set_si (R, n);
1962 3 : mpz_mod (R, R, modulus->orig_modulus);
1963 : }
1964 83 : else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
1965 : {
1966 83 : mpz_set_si (modulus->temp1, n);
1967 83 : mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
1968 83 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
1969 : }
1970 : ASSERT_NORMALIZED (R);
1971 86 : }
1972 :
1973 : /* R <- -S mod modulus. Does not need to be efficient. */
1974 : void
1975 1038623 : mpres_neg (mpres_t R, const mpres_t S, ATTRIBUTE_UNUSED mpmod_t modulus)
1976 : {
1977 : ASSERT_NORMALIZED (S);
1978 1038623 : mpz_neg (R, S);
1979 : ASSERT_NORMALIZED (R);
1980 1038623 : }
1981 :
1982 : /* Returns non-zero if inversion succeeded, and zero if not */
1983 :
1984 : int
1985 2779612 : mpres_invert (mpres_t R, const mpres_t S, mpmod_t modulus)
1986 : {
1987 : #ifdef WANT_ASSERT_EXPENSIVE
1988 : mpres_t test;
1989 : mpz_t test_result;
1990 : mpres_init (test, modulus);
1991 : mpres_set (test, S, modulus);
1992 : #endif
1993 :
1994 : ASSERT_NORMALIZED (S);
1995 :
1996 2779612 : if (mpz_invert (modulus->temp2, S, modulus->orig_modulus) == 0)
1997 409 : return 0;
1998 :
1999 2779203 : if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
2000 : {
2001 284724 : mpz_set (R, modulus->temp2);
2002 284724 : ASSERT_NORMALIZED (R);
2003 : }
2004 2494479 : else if (modulus->repr == ECM_MOD_MODMULN)
2005 : {
2006 2227086 : ecm_mulredc_basecase (R, modulus->temp2, modulus->R3, modulus);
2007 : ASSERT_NORMALIZED (R);
2008 : }
2009 267393 : else if (modulus->repr == ECM_MOD_REDC)
2010 : {
2011 : MPZ_NORMALIZED (S);
2012 267393 : mpz_mul (modulus->temp1, modulus->temp2, modulus->R3);
2013 267393 : REDC (R, modulus->temp1, modulus->temp2, modulus);
2014 : ASSERT_NORMALIZED (R);
2015 : }
2016 : #ifdef DEBUG
2017 : else
2018 : {
2019 : fprintf (ECM_STDERR, "mpres_invert: Unexpected representation %d\n",
2020 : modulus->repr);
2021 : exit (EXIT_FAILURE);
2022 : }
2023 : #endif
2024 :
2025 : #ifdef WANT_ASSERT_EXPENSIVE
2026 : mpres_mul (test, test, R, modulus);
2027 : mpz_init (test_result);
2028 : mpres_get_z (test_result, test, modulus);
2029 : ASSERT_ALWAYS(mpz_cmp_ui (test_result, 1UL) == 0);
2030 : mpz_clear (test_result);
2031 : mpres_clear (test, modulus);
2032 : #endif
2033 :
2034 2779203 : return 1;
2035 : }
2036 :
2037 : void
2038 259287 : mpres_gcd (mpz_t R, const mpres_t S, const mpmod_t modulus)
2039 : {
2040 : /* In MODMULN and REDC form, M(x) = x*R with gcd(R, modulus) = 1 .
2041 : Therefore gcd(M(x), modulus) = gcd(x, modulus) and we need not bother
2042 : to convert out of Montgomery form. */
2043 : ASSERT_NORMALIZED (S);
2044 259287 : mpz_gcd (R, S, modulus->orig_modulus);
2045 259287 : }
2046 :
2047 : #ifdef HAVE_ADDLAWS
2048 : /* Returns non-zero if the two residues are equal,
2049 : and zero if they are not */
2050 : int
2051 1078060 : mpres_equal (const mpres_t S1, const mpres_t S2, mpmod_t modulus)
2052 : {
2053 1078060 : mpz_mod (modulus->temp1, S1, modulus->orig_modulus);
2054 1078060 : mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
2055 1078060 : return (mpz_cmp (modulus->temp1, modulus->temp2) == 0);
2056 : }
2057 : #endif
2058 :
2059 : #if 0 /* those routines are not called in normal operation */
2060 :
2061 : void
2062 : mpres_out_str (FILE *fd, const unsigned int base, const mpres_t S,
2063 : mpmod_t modulus)
2064 : {
2065 : mpres_get_z (modulus->temp2, S, modulus);
2066 : mpz_out_str (fd, base, modulus->temp2);
2067 : }
2068 :
2069 : /* Multiplies S1 by the one-limb integer S2, and does modulo reduction.
2070 : The modulo reduction may imply multiplication of the residue class
2071 : by some constant, since we may not do the correct number of REDC
2072 : reduction passes and so fail to divide by the correct power of 2 for
2073 : Montgomery representation. The constant is the same for each call
2074 : of this function with a given modulus, however. */
2075 : static void
2076 : ecm_mulredc_1_basecase (mpres_t R, const mpres_t S1, const mp_limb_t S2,
2077 : mpmod_t modulus)
2078 : {
2079 : mp_ptr s1p;
2080 : mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
2081 :
2082 : ASSERT(ALLOC(R) >= nn);
2083 : ASSERT(ALLOC(S1) >= nn);
2084 : s1p = PTR(S1);
2085 : for (j = ABSIZ(S1); j < nn; j++)
2086 : s1p[j] = 0;
2087 :
2088 : #ifdef HAVE_NATIVE_MULREDC1_N
2089 : if (nn < 20)
2090 : {
2091 : mp_ptr rp = PTR(R);
2092 : mulredc_1(rp, S2, s1p, PTR(modulus->orig_modulus), nn,
2093 : modulus->Nprim[0]);
2094 : MPN_NORMALIZE (rp, nn);
2095 : SIZ(R) = (SIZ(S1)) < 0 ? (int) -nn : (int) nn;
2096 : }
2097 : else
2098 : #endif
2099 : {
2100 : /* FIXME, we can do much better than this */
2101 : mpz_mul_ui (modulus->temp1, S1, S2);
2102 : mpz_mod(R, modulus->temp1, modulus->orig_modulus);
2103 : }
2104 : }
2105 :
2106 : /* Multiplies S by n and possibly divides by some constant.
2107 : Whether or not it divides depends on the modulus representation and
2108 : the modulus size. */
2109 : void
2110 : mpres_muldivbysomething_si (mpres_t R, const mpres_t S, const long n,
2111 : mpmod_t modulus)
2112 : {
2113 : ASSERT_NORMALIZED (S);
2114 : if (modulus->repr == ECM_MOD_MODMULN &&
2115 : modulus->bits / GMP_NUMB_BITS <= 20)
2116 : /* FIXME: is the 20 here the same constant as in mulredc1_20?
2117 : If so, it should be changed into a macro. */
2118 : {
2119 : MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
2120 : if (n < 0)
2121 : {
2122 : ecm_mulredc_1_basecase (R, S, (mp_limb_t) -n, modulus);
2123 : mpres_neg (R, R, modulus);
2124 : }
2125 : else
2126 : {
2127 : ecm_mulredc_1_basecase (R, S, (mp_limb_t) n, modulus);
2128 : }
2129 : }
2130 : else
2131 : {
2132 : mpz_mul_si (modulus->temp1, S, n);
2133 : /* This is the same for all methods: just reduce with original modulus */
2134 : mpz_mod (R, modulus->temp1, modulus->orig_modulus);
2135 : }
2136 : ASSERT_NORMALIZED (R);
2137 : }
2138 :
2139 : /* Returns 1 if successful, 0 if not */
2140 : static int
2141 : test_mpres_set_z_for_gcd_fix (const int maxk, mpmod_t modulus)
2142 : {
2143 : mpres_t m, prod;
2144 : mpz_t n;
2145 : int k;
2146 :
2147 : mpres_init (prod, modulus);
2148 : mpres_init (m, modulus);
2149 : mpz_init (n);
2150 :
2151 : /* m = 1 * c, where c is a constant depending on mod reduction method */
2152 : mpz_set_ui (n, 1);
2153 : mpres_set_z_for_gcd (m, n, modulus);
2154 :
2155 : for (k = 0; k <= maxk; k++)
2156 : {
2157 : int i;
2158 : mpres_set_ui (prod, 1, modulus);
2159 :
2160 : /* Compute prod = 1 * (1 * c)^k */
2161 : for (i = 0; i < k; i++)
2162 : mpres_mul (prod, prod, m, modulus);
2163 :
2164 : /* Divide prod by c^k */
2165 : mpz_set_ui (n, k);
2166 : mpres_set_z_for_gcd_fix (prod, prod, n, modulus);
2167 :
2168 : /* Result should be 1 */
2169 : mpres_get_z (n, prod, modulus);
2170 : if (mpz_cmp_ui (n, 1) != 0) {
2171 : gmp_printf ("Error: test of mpres_set_z_for_gcd_fix() failed, k = %d, n = %Zd\n", k, n);
2172 : return 0;
2173 : }
2174 : }
2175 :
2176 : mpz_clear (n);
2177 : mpres_clear (m, modulus);
2178 : mpres_clear (prod, modulus);
2179 : return 1;
2180 : }
2181 :
2182 : int
2183 : mpmod_selftest (const mpz_t n)
2184 : {
2185 : mpres_t test1, test2;
2186 : mpmod_t modulus;
2187 :
2188 : printf ("Performing self test of modular arithmetic\n");
2189 : mpmod_init (modulus, n, 0);
2190 : mpres_init (test1, modulus);
2191 : mpres_init (test2, modulus);
2192 : mpres_set_ui (test1, 2, modulus);
2193 : mpres_set_ui (test2, 5, modulus);
2194 : mpres_muldivbysomething_si (test1, test1, 5, modulus);
2195 : mpres_muldivbysomething_si (test2, test2, 2, modulus);
2196 : if (!mpres_equal (test1, test2, modulus))
2197 : {
2198 : printf ("mpres_muldivbysomething_si() wrong\n");
2199 : fflush (stdout);
2200 : abort();
2201 : }
2202 : mpres_clear (test1, modulus);
2203 : mpres_clear (test2, modulus);
2204 :
2205 : if (!test_mpres_set_z_for_gcd_fix(10, modulus))
2206 : abort();
2207 :
2208 : mpmod_clear (modulus);
2209 :
2210 : return 0;
2211 : }
2212 : #endif /* #if 0 */
2213 :
2214 : /****************************************************/
2215 : /* mpresn: modular arithmetic based directly on mpn */
2216 : /****************************************************/
2217 :
2218 : /* We use here a signed word-based redundant representation.
2219 :
2220 : In case N < B^n/16 (since for redc where we add to the absolute value of
2221 : the residue), where n is the number of limbs of N in base B (2^32 or 2^64
2222 : usually), we can prove there is no adjustment (adding or subtracting N),
2223 : cf http://www.loria.fr/~zimmerma/papers/norm.pdf.
2224 :
2225 : However current branch predictors are quite good, thus we prefer to keep
2226 : the tests and to allow any input N (instead of only N < B^n/16).
2227 : */
2228 :
2229 : /* ensure R has allocated space for at least n limbs,
2230 : and if less than n limbs are used, pad with zeros,
2231 : and set SIZ(R) to n if positive or -n if negative */
2232 : void
2233 656 : mpresn_pad (mpres_t R, mpmod_t N)
2234 : {
2235 656 : mp_size_t n = ABSIZ(N->orig_modulus);
2236 : mp_size_t rn;
2237 :
2238 656 : _mpz_realloc (R, n);
2239 656 : rn = mpz_size (R);
2240 656 : ASSERT_ALWAYS (rn <= n);
2241 656 : if (rn < n)
2242 : {
2243 768 : MPN_ZERO (PTR(R) + rn, n - rn);
2244 256 : SIZ(R) = SIZ(R) >= 0 ? n : -n;
2245 : }
2246 656 : }
2247 :
2248 : void
2249 304 : mpresn_unpad (mpres_t R)
2250 : {
2251 304 : mp_size_t n = ABSIZ(R);
2252 :
2253 332 : while (n > 0 && PTR(R)[n-1] == 0)
2254 28 : n--;
2255 304 : SIZ(R) = SIZ(R) >= 0 ? n : -n;
2256 304 : }
2257 :
2258 : /* R <- S1 * S1 mod N, used only for ECM_MOD_MODMULN */
2259 : void
2260 54413472 : mpresn_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus)
2261 : {
2262 54413472 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2263 :
2264 : ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
2265 :
2266 54413472 : ecm_sqrredc_basecase_n (PTR(R), PTR(S1), PTR(modulus->orig_modulus),
2267 : n, modulus->Nprim, PTR(modulus->temp1));
2268 :
2269 54413472 : SIZ(R) = n;
2270 54413472 : }
2271 :
2272 : /* R <- S1 * S2 mod N, used only for ECM_MOD_MODMULN */
2273 : void
2274 55292472 : mpresn_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
2275 : {
2276 55292472 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2277 :
2278 : ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
2279 : ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
2280 :
2281 55292472 : ecm_mulredc_basecase_n (PTR(R), PTR(S1), PTR(S2), PTR(modulus->orig_modulus),
2282 : n, modulus->Nprim, PTR(modulus->temp1));
2283 :
2284 55292472 : SIZ(R) = SIZ(S1) == SIZ(S2) ? n : -n;
2285 55292472 : }
2286 :
2287 : /* R <- S*m/B mod modulus where m fits in a mp_limb_t.
2288 : Here S (w in dup_add_batch1) is the result of a subtraction,
2289 : thus with the notations from http://www.loria.fr/~zimmerma/papers/norm.pdf
2290 : we have S < 2 \alpha N.
2291 : Then R < (2 \alpha N \beta + \beta N) = (2 \alpha + 1) N.
2292 : This result R is used in an addition with u being the result of a squaring
2293 : thus u < \alpha N, which gives a result < (3 \alpha + 1) N.
2294 : Finally this result is used in a multiplication with another operand less
2295 : than 2 \alpha N, thus we want:
2296 : ((2 \alpha) (3 \alpha + 1) N^2 + \beta N)/\beta \leq \alpha N, i.e.,
2297 : 2 \alpha (3 \alpha + 1) \varepsilon + 1 \leq \alpha
2298 : This implies \varepsilon \leq 7/2 - sqrt(3)/2 ~ 0.0359, in which case
2299 : we can take \alpha = 2/3*sqrt(3)+1 ~ 2.1547.
2300 : In that case no adjustment is needed in mpresn_mul_1.
2301 : However we prefer to keep the adjustment here, to allow a larger set of
2302 : inputs (\varepsilon \leq 1/16 = 0.0625 instead of 0.0359).
2303 : */
2304 : void
2305 12724368 : mpresn_mul_1 (mpres_t R, const mpres_t S, const mp_limb_t m, mpmod_t modulus)
2306 : {
2307 12724368 : mp_ptr t1 = PTR(modulus->temp1);
2308 12724368 : mp_ptr t2 = PTR(modulus->temp2);
2309 12724368 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2310 : mp_limb_t q;
2311 :
2312 : ASSERT (SIZ(S) == n || -SIZ(S) == n);
2313 : ASSERT (ALLOC(modulus->temp1) >= n+1);
2314 :
2315 : #if defined(USE_ASM_REDC) && defined(HAVE_NATIVE_MULREDC1_N)
2316 12724368 : if (n <= MULREDC_ASSEMBLY_MAX)
2317 12724368 : mulredc_1 (PTR(R), m, PTR(S), PTR(modulus->orig_modulus), n,
2318 12724368 : modulus->Nprim[0]);
2319 : else
2320 : #endif
2321 : {
2322 0 : t1[n] = mpn_mul_1 (t1, PTR(S), n, m);
2323 0 : q = t1[0] * modulus->Nprim[0];
2324 0 : t2[n] = mpn_mul_1 (t2, PTR(modulus->orig_modulus), n, q);
2325 : #ifdef HAVE___GMPN_ADD_NC
2326 0 : q = __gmpn_add_nc (PTR(R), t1 + 1, t2 + 1, n, t1[0] != 0);
2327 : #else
2328 : q = mpn_add_n (PTR(R), t1 + 1, t2 + 1, n);
2329 : q += mpn_add_1 (PTR(R), PTR(R), n, t1[0] != 0);
2330 : #endif
2331 0 : while (q != 0)
2332 0 : q -= mpn_sub_n (PTR(R), PTR(R), PTR(modulus->orig_modulus), n);
2333 : }
2334 :
2335 12724368 : SIZ(R) = SIZ(S); /* sign is unchanged */
2336 12724368 : }
2337 :
2338 : /* R <- S1 + S2 mod modulus */
2339 : /* we assume all numbers are allocated to n limbs, and unused most significant
2340 : limbs are set to zero */
2341 : void
2342 27206736 : mpresn_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
2343 : {
2344 27206736 : mp_ptr r = PTR(R);
2345 27206736 : mp_ptr s1 = PTR(S1);
2346 27206736 : mp_ptr s2 = PTR(S2);
2347 27206736 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2348 : ATTRIBUTE_UNUSED mp_limb_t cy;
2349 :
2350 : ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
2351 : ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
2352 :
2353 27206736 : if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */
2354 : {
2355 20402073 : cy = mpn_add_n (r, s1, s2, n);
2356 : /* for N < B^n/16, the while loop will be never performed, which proves
2357 : it will be performed a small number of times. In practice we
2358 : observed up to 7 loops, but it happens rarely. */
2359 : #ifndef MPRESN_NO_ADJUSTMENT
2360 20741025 : while (cy != 0)
2361 338952 : cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
2362 : #endif
2363 20402073 : SIZ(R) = SIZ(S1);
2364 : }
2365 : else /* different signs */
2366 : {
2367 6804663 : if (mpn_cmp (s1, s2, n) >= 0)
2368 : {
2369 4485656 : mpn_sub_n (r, s1, s2, n); /* no borrow here */
2370 4485656 : SIZ(R) = SIZ(S1);
2371 : }
2372 : else
2373 : {
2374 2319007 : mpn_sub_n (r, s2, s1, n); /* idem */
2375 2319007 : SIZ(R) = SIZ(S2);
2376 : }
2377 : }
2378 27206736 : }
2379 :
2380 : void
2381 13603368 : mpresn_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
2382 : {
2383 13603368 : mp_ptr r = PTR(R);
2384 13603368 : mp_ptr s1 = PTR(S1);
2385 13603368 : mp_ptr s2 = PTR(S2);
2386 13603368 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2387 : ATTRIBUTE_UNUSED mp_limb_t cy;
2388 :
2389 : ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
2390 : ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
2391 :
2392 13603368 : if (SIZ(S1) != SIZ(S2)) /* S1 and S2 are of different signs */
2393 : {
2394 0 : cy = mpn_add_n (r, s1, s2, n);
2395 : #ifndef MPRESN_NO_ADJUSTMENT
2396 0 : while (cy != 0)
2397 0 : cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
2398 : #endif
2399 0 : SIZ(R) = SIZ(S1);
2400 : }
2401 : else /* same signs, it's a real subtraction */
2402 : {
2403 13603368 : if (mpn_cmp (s1, s2, n) >= 0)
2404 : {
2405 6798705 : mpn_sub_n (r, s1, s2, n); /* no borrow here */
2406 6798705 : SIZ(R) = SIZ(S1);
2407 : }
2408 : else
2409 : {
2410 6804663 : mpn_sub_n (r, s2, s1, n); /* idem */
2411 6804663 : SIZ(R) = -SIZ(S2);
2412 : }
2413 :
2414 : }
2415 13603368 : }
2416 :
2417 : /* (R, T) <- (S1 + S2, S1 - S2)
2418 : Assume R differs from both S1 and S2.
2419 : */
2420 : void
2421 40810104 : mpresn_addsub (mpres_t R, mpres_t T,
2422 : const mpres_t S1, const mpres_t S2, mpmod_t modulus)
2423 : {
2424 40810104 : mp_ptr r = PTR(R);
2425 40810104 : mp_ptr t = PTR(T);
2426 40810104 : mp_ptr s1 = PTR(S1);
2427 40810104 : mp_ptr s2 = PTR(S2);
2428 40810104 : mp_size_t n = ABSIZ(modulus->orig_modulus);
2429 : ATTRIBUTE_UNUSED mp_limb_t cy;
2430 :
2431 : ASSERT (R != S1);
2432 : ASSERT (R != S2);
2433 : ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
2434 : ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
2435 :
2436 40810104 : if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */
2437 : {
2438 29520056 : cy = mpn_add_n (r, s1, s2, n);
2439 : #ifndef MPRESN_NO_ADJUSTMENT
2440 30069824 : while (cy != 0)
2441 549768 : cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
2442 : #endif
2443 29520056 : SIZ(R) = SIZ(S1);
2444 29520056 : if (mpn_cmp (s1, s2, n) >= 0)
2445 : {
2446 11486626 : mpn_sub_n (t, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */
2447 11486626 : SIZ(T) = SIZ(S1);
2448 : }
2449 : else
2450 : {
2451 18033430 : mpn_sub_n (t, s2, s1, n); /* idem since {s2,n} >= {s1,n} */
2452 18033430 : SIZ(T) = -SIZ(S2);
2453 : }
2454 : }
2455 : else /* different signs */
2456 : {
2457 11290048 : if (mpn_cmp (s1, s2, n) >= 0)
2458 : {
2459 5649097 : mpn_sub_n (r, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */
2460 5649097 : SIZ(R) = SIZ(S1);
2461 : }
2462 : else
2463 : {
2464 5640951 : mpn_sub_n (r, s2, s1, n); /* idem since {s2,n} >= {s1,n} */
2465 5640951 : SIZ(R) = SIZ(S2);
2466 : }
2467 11290048 : cy = mpn_add_n (t, s1, s2, n);
2468 : #ifndef MPRESN_NO_ADJUSTMENT
2469 11501656 : while (cy != 0)
2470 211608 : cy -= mpn_sub_n (t, t, PTR(modulus->orig_modulus), n);
2471 : #endif
2472 11290048 : SIZ(T) = SIZ(S1);
2473 : }
2474 40810104 : }
|