LCOV - unnamed - ecm/mpmod.c

LCOV - code coverage report

Current view:	top level - ecm - mpmod.c (source / functions)		Hit	Total	Coverage
Test:	unnamed	Lines:	919	1001	91.8 %
Date:	2022-03-21 11:19:20	Functions:	57	57	100.0 %

          Line data    Source code

       1             : /* Modular multiplication.
       2             : 
       3             : Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
       4             : Paul Zimmermann, Alexander Kruppa and Cyril Bouvier.
       5             : 
       6             : This file is part of the ECM Library.
       7             : 
       8             : The ECM Library is free software; you can redistribute it and/or modify
       9             : it under the terms of the GNU Lesser General Public License as published by
      10             : the Free Software Foundation; either version 3 of the License, or (at your
      11             : option) any later version.
      12             : 
      13             : The ECM Library is distributed in the hope that it will be useful, but
      14             : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15             : or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16             : License for more details.
      17             : 
      18             : You should have received a copy of the GNU Lesser General Public License
      19             : along with the ECM Library; see the file COPYING.LIB.  If not, see
      20             : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21             : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22             : 
      23             : #include <stdio.h>
      24             : #include <stdlib.h>
      25             : #include "ecm-gmp.h"
      26             : #include "ecm-impl.h"
      27             : #include "mpmod.h"
      28             : 
      29             : #ifdef USE_ASM_REDC
      30             :   #include "mulredc.h"
      31             : #endif
      32             : 
      33             : FILE *ECM_STDOUT, *ECM_STDERR; /* define them here since needed in tune.c */
      34             : 
      35             : /* define WANT_ASSERT to check normalization of residues */
      36             : /* #define WANT_ASSERT 1 */
      37             : /* #define DEBUG */
      38             : /* #define WANT_ASSERT_EXPENSIVE 1 */
      39             : 
      40             : #define ASSERT_NORMALIZED(x) ASSERT ((modulus->repr != ECM_MOD_MODMULN && \
      41             :                                       modulus->repr != ECM_MOD_REDC) || \
      42             :                              mpz_size (x) <= mpz_size (modulus->orig_modulus))
      43             : #define MPZ_NORMALIZED(x)    ASSERT (PTR(x)[ABSIZ(x)-1] != 0)
      44             : 
      45             : 
      46             : 
      47             : static void ecm_redc_basecase (mpz_ptr, mpz_ptr, mpmod_t) ATTRIBUTE_HOT;
      48             : static void ecm_mulredc_basecase (mpres_t, const mpres_t, const mpres_t, 
      49             :                                   mpmod_t) ATTRIBUTE_HOT;
      50             : static void base2mod (mpres_t, const mpres_t, mpres_t, mpmod_t) ATTRIBUTE_HOT;
      51             : static void REDC (mpres_t, const mpres_t, mpz_t, mpmod_t);
      52             : 
      53             : /* returns +/-l if n is a factor of N = 2^l +/- 1 with N <= n^threshold, 
      54             :    0 otherwise.
      55             : */
      56             : int 
      57        1590 : isbase2 (const mpz_t n, const double threshold)
      58             : {
      59             :   unsigned int k, lo; 
      60        1590 :   int res = 0; 
      61             :   mpz_t u, w;
      62             : 
      63        1590 :   mpz_init (u);
      64        1590 :   mpz_init (w);
      65        1590 :   lo = mpz_sizeinbase (n, 2) - 1; /* 2^lo <= n < 2^(lo+1) */  
      66        1590 :   mpz_set_ui (u, 1UL);
      67        1590 :   mpz_mul_2exp (u, u, 2UL * lo);
      68        1590 :   mpz_mod (w, u, n); /* 2^(2lo) mod n = -/+2^(2lo-l) if m*n = 2^l+/-1 */
      69        1590 :   if (mpz_cmp_ui (w, 1UL) == 0) /* if 2^(2lo) mod n = 1, then n divides
      70             :                                  2^(2lo)-1. If algebraic factors have been
      71             :                                  removed, n divides either 2^lo+1 or 2^lo-1.
      72             :                                  But since n has lo+1 bits, n can only divide
      73             :                                  2^lo+1. More precisely, n must be 2^lo+1. */
      74             :     {
      75             :       /* check that n equals 2^lo+1. Since n divides 2^(2lo)-1, n is odd. */
      76          38 :       if (mpz_scan1 (n, 1UL) != lo)
      77           7 :         lo = 0;
      78          38 :       mpz_clear (w);
      79          38 :       mpz_clear (u);
      80          38 :       return lo;
      81             :     }
      82        1552 :   k = mpz_sizeinbase (w, 2) - 1;
      83             :   /* if w = 2^k then n divides 2^(2*lo-k)-1 */
      84        1552 :   mpz_set_ui (u, 1UL);
      85        1552 :   mpz_mul_2exp (u, u, k);
      86        1552 :   if (mpz_cmp(w, u) == 0) 
      87         119 :     res = k - 2 * lo;
      88             :   else /* if w = -2^k then n divides 2^(2*lo-k)+1 */
      89             :     {
      90        1433 :       mpz_neg (w, w);
      91        1433 :       mpz_mod (w, w, n);
      92        1433 :       k = mpz_sizeinbase (w, 2) - 1;
      93        1433 :       mpz_set_ui (u, 1UL);
      94        1433 :       mpz_mul_2exp (u, u, k);
      95        1433 :       if (mpz_cmp (w, u) == 0) 
      96          57 :         res = 2 * lo - k;
      97             :     }
      98        1552 :   mpz_clear (u);
      99        1552 :   mpz_clear (w);
     100             : 
     101        1552 :   if (abs (res) > (int) (threshold * (double) lo)) 
     102          24 :     res = 0;
     103             : 
     104        1552 :   if (abs (res) < 16)
     105        1400 :     res = 0;
     106             : 
     107        1552 :   return res;
     108             : }
     109             : 
     110             : /* Do base-2 reduction. R must not equal S or t. */
     111             : static void
     112    29668619 : base2mod (mpres_t R, const mpres_t S, mpres_t t, mpmod_t modulus)
     113             : {
     114    29668619 :   unsigned long absbits = abs (modulus->bits);
     115             : 
     116             :   ASSERT (R != S && R != t);
     117    29668619 :   mpz_tdiv_q_2exp (R, S, absbits);
     118    29668619 :   mpz_tdiv_r_2exp (t, S, absbits);
     119    29668619 :   if (modulus->bits < 0)
     120    16326241 :     mpz_add (R, R, t);
     121             :   else
     122    13342378 :     mpz_sub (R, t, R);
     123             : 
     124             :   /* mpz_mod (R, R, modulus->orig_modulus); */
     125    38017414 :   while (mpz_sizeinbase (R, 2) > absbits)
     126             :     {
     127     8348795 :       mpz_tdiv_q_2exp (t, R, absbits);
     128     8348795 :       mpz_tdiv_r_2exp (R, R, absbits);
     129     8348795 :       if (modulus->bits < 0)
     130     6133610 :         mpz_add (R, R, t);
     131             :       else
     132     2215185 :         mpz_sub (R, R, t);
     133             :     }
     134    29668619 : }
     135             : 
     136             : /* Modular reduction modulo the Fermat number 2^m+1. 
     137             :    n = m / GMP_NUMB_BITS. Result is < 2^m+1.
     138             :    FIXME: this does not work with nails.
     139             :    Only copies the data to R if reduction is needed and returns 1 in that 
     140             :    case. If the value in S is reduced already, nothing is done and 0 is 
     141             :    returned. Yes, this is ugly. */
     142             : static int
     143       68053 : base2mod_2 (mpres_t R, const mpres_t S, mp_size_t n, mpz_t modulus)
     144             : {
     145             :   mp_size_t s;
     146             : 
     147       68053 :   s = ABSIZ(S);
     148       68053 :   if (s > n)
     149             :     {
     150       10565 :       if (s == n + 1)
     151             :         {
     152       10565 :           mp_srcptr sp = PTR(S);
     153             :           mp_ptr rp;
     154             :           
     155       10565 :           MPZ_REALLOC (R, s);
     156       10565 :           rp = PTR(R);
     157             : 
     158       10565 :           if ((rp[n] = mpn_sub_1 (rp, sp, n, sp[n])))
     159           0 :             rp[n] = mpn_add_1 (rp, rp, n, rp[n]);
     160       21130 :           MPN_NORMALIZE(rp, s);
     161             :           ASSERT (s <= n || (s == n && rp[n] == 1));
     162       10565 :           SIZ(R) = (SIZ(S) > 0) ? (int) s : (int) -s;
     163             :         }
     164             :       else /* should happen rarely */
     165           0 :         mpz_mod (R, S, modulus);
     166             : 
     167       10565 :       return 1;
     168             :     }
     169             :   
     170       57488 :   return 0;
     171             : }
     172             : 
     173             : /* subquadratic REDC, at mpn level.
     174             :    {orig,n} is the original modulus.
     175             :    Requires xn = 2n or 2n-1 and ABSIZ(orig_modulus)=n.
     176             :  */
     177             : static void
     178   100007046 : ecm_redc_n (mp_ptr rp, mp_srcptr x0p, mp_size_t xn,
     179             :             mp_srcptr orig, mp_srcptr invm, mp_size_t n)
     180             : {
     181             :   mp_ptr tp, up, xp;
     182   100007046 :   mp_size_t nn = n + n;
     183             :   mp_limb_t cy, cin;
     184             :   TMP_DECL;
     185             : 
     186             :   ASSERT((xn == nn) || (xn == nn - 1));
     187             : 
     188             :   TMP_MARK;
     189   100007046 :   up = TMP_ALLOC_LIMBS(nn + nn);
     190   100007046 :   if (xn < nn)
     191             :     {
     192           0 :       xp = TMP_ALLOC_LIMBS(nn);
     193           0 :       MPN_COPY (xp, x0p, xn);
     194           0 :       xp[nn - 1] = 0;
     195             :     }
     196             :   else
     197   100007046 :     xp = (mp_ptr) x0p;
     198             : #ifdef HAVE___GMPN_MULLO_N /* available up from GMP 5.0.0 */
     199   100007046 :   __gmpn_mullo_n (up, xp, invm, n);
     200             : #else
     201             :   ecm_mul_lo_n (up, xp, invm, n);
     202             : #endif
     203   100007046 :   tp = up + nn;
     204   100007046 :   mpn_mul_n (tp, up, orig, n);
     205             :   /* add {x, 2n} and {tp, 2n}. We know that {tp, n} + {xp, n} will give
     206             :      either 0, or a carry out. If xp[n-1] <> 0 or tp[n-1] <> 0, 
     207             :      then there is a carry. We use a binary OR, which sets the zero flag
     208             :      if and only if both operands are zero. */
     209   100007046 :   cin = (mp_limb_t) ((xp[n - 1] | tp[n - 1]) ? 1 : 0);
     210             : #ifdef HAVE___GMPN_ADD_NC
     211   100007046 :   cy = __gmpn_add_nc (rp, tp + n, xp + n, n, cin);
     212             : #else
     213             :   cy = mpn_add_n (rp, tp + n, xp + n, n);
     214             :   cy += mpn_add_1 (rp, rp, n, cin);
     215             : #endif
     216             :   /* since we add at most N-1 to the upper half of {x0p,2n},
     217             :      one adjustment is enough */
     218   100007046 :   if (cy)
     219     3372889 :     cy -= mpn_sub_n (rp, rp, orig, n);
     220             :   ASSERT (cy == 0);
     221             :   TMP_FREE;
     222   100007046 : }
     223             : 
     224             : /* REDC. x and t must not be identical, t has limb growth */
     225             : /* subquadratic REDC, at mpz level */
     226             : static void 
     227   380122664 : REDC (mpres_t r, const mpres_t x, mpz_t t, mpmod_t modulus)
     228             : {
     229   380122664 :   mp_size_t n = modulus->bits / GMP_NUMB_BITS;
     230   380122664 :   mp_size_t xn = ABSIZ(x);
     231             : 
     232             :   ASSERT (xn <= 2 * n);
     233   380122664 :   if (xn == 2 * n) /* ecm_redc_n also accepts xn=2n-1, but this seems slower
     234             :                     for now (see remark in TODO) */
     235             :     {
     236             :       mp_ptr rp;
     237    93019051 :       MPZ_REALLOC (r, n);
     238    93019051 :       rp = PTR(r);
     239    93019051 :       ecm_redc_n (rp, PTR(x), xn, PTR(modulus->orig_modulus),
     240    93019051 :                   PTR(modulus->aux_modulus), n);
     241    93019061 :       MPN_NORMALIZE(rp, n);
     242    93019051 :       SIZ(r) = (SIZ(x) > 0) ? (int) n : (int) -n;
     243             :       MPZ_NORMALIZED (r);
     244             :     }
     245             :   else
     246             :     {
     247   287103613 :       mpz_tdiv_r_2exp (t, x, modulus->bits);
     248   287103613 :       mpz_mul (t, t, modulus->aux_modulus);
     249   287103613 :       mpz_tdiv_r_2exp (t, t, modulus->bits);  /* t = (x % R) * 1/N (mod R) */
     250   287103613 :       mpz_mul (t, t, modulus->orig_modulus);  /* t <= (R-1)*N */
     251   287103613 :       mpz_add (t, t, x);                      /* t < (R-1)*N + R^2/B */
     252   287103613 :       mpz_tdiv_q_2exp (r, t, modulus->bits);  /* r = (x + m*N) / R
     253             :                                                  < N + R/B */
     254   287103613 :       if (ABSIZ (r) > n) /* this can happen in practice but is extremely
     255             :                             unlikely: in particular it requires that the
     256             :                             upper limb of N has only ones */
     257           0 :         mpz_sub (r, r, modulus->multiple);
     258             :     }
     259             :   ASSERT (ABSIZ(r) <= n);
     260   380122664 : }
     261             : 
     262             : 
     263             : /* Quadratic time redc for n word moduli. */
     264             : static inline void 
     265    15346253 : redc_basecase_n (mp_ptr rp, mp_ptr cp, mp_srcptr np, const mp_size_t nn, 
     266             :                  const mp_ptr invm)
     267             : {
     268             : #ifdef HAVE___GMPN_REDC_2
     269    15346253 :   REDC2(rp, cp, np, nn, invm);
     270             : #else /* HAVE___GMPN_REDC_2 is not defined */
     271             : #ifdef HAVE___GMPN_REDC_1
     272             :   REDC1(rp, cp, np, nn, invm[0]);
     273             : #else /* neither HAVE___GMPN_REDC_2 nor HAVE___GMPN_REDC_1 is defined */
     274             :   mp_limb_t cy;
     275             :   mp_size_t j;
     276             :   
     277             :   for (j = 0; j < nn; j++)
     278             :     {
     279             :       cy = mpn_addmul_1 (cp, np, nn, cp[0] * invm[0]);
     280             :       ASSERT(cp[0] == (mp_limb_t) 0);
     281             :       cp[0] = cy;
     282             :       cp++;
     283             :     }
     284             :   /* add vector of carries and shift */
     285             :   cy = mpn_add_n (rp, cp, cp - nn, nn);
     286             :   /* the result of Montgomery's REDC is less than 2^Nbits + N,
     287             :      thus at most one correction is enough */
     288             :   if (cy != 0)
     289             :     {
     290             :       mp_limb_t t;
     291             :       t = mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
     292             :       ASSERT (t == 1);
     293             :     }
     294             : #endif /* HAVE___GMPN_REDC_1 */
     295             : #endif /* HAVE___GMPN_REDC_2 */
     296    15346253 : }
     297             : 
     298             : /* r <- c/R^nn mod n, where n has nn limbs, and R=2^GMP_NUMB_BITS.
     299             :    n must be odd.
     300             :    c must have space for at least 2*nn limbs.
     301             :    r must have space for at least n limbs.
     302             :    c and r can be the same variable.
     303             :    The data in c is clobbered.
     304             : */
     305             : static void 
     306    15346253 : ecm_redc_basecase (mpz_ptr r, mpz_ptr c, mpmod_t modulus)
     307             : {
     308             :   mp_ptr rp;
     309             :   mp_ptr cp;
     310             :   mp_srcptr np;
     311    15346253 :   mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
     312             : 
     313             :   ASSERT(ABSIZ(c) <= 2 * nn);
     314             :   ASSERT(ALLOC(c) >= 2 * nn);
     315             :   ASSERT(ALLOC(r) >= nn);
     316    15346253 :   cp = PTR(c);
     317    15346253 :   rp = PTR(r);
     318    15346253 :   np = PTR(modulus->orig_modulus);
     319    72703007 :   for (j = ABSIZ(c); j < 2 * nn; j++) 
     320    57356754 :     cp[j] = 0;
     321             :   
     322    15346253 :   redc_basecase_n (rp, cp, np, nn, modulus->Nprim);
     323             : 
     324    15695134 :   MPN_NORMALIZE (rp, nn);
     325    15346253 :   SIZ(r) = SIZ(c) < 0 ? (int) -nn : (int) nn;
     326    15346253 : }
     327             : 
     328             : #ifdef USE_ASM_REDC
     329             : /* Quadratic time multiplication and REDC with nn-limb modulus.
     330             :    x and y are nn-limb residues, the nn-limb result is written to z. 
     331             :    This function merely calls the correct mulredc*() assembly function
     332             :    depending on nn, and processes any leftover carry. */
     333             : 
     334             : static void
     335  5792701534 : mulredc (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_srcptr m, 
     336             :          const mp_size_t nn, const mp_limb_t invm)
     337             : {
     338             :   mp_limb_t cy;
     339  5792701534 :   switch (nn) 
     340             :     {
     341   112427488 :       case 1:
     342   112427488 :         cy = mulredc1(z, x[0], y[0], m[0], invm);
     343   112427488 :         break;
     344  1365842989 :       case 2:
     345  1365842989 :         cy = mulredc2(z, x, y, m, invm);
     346  1365842989 :         break;
     347  3987645064 :       case 3:
     348  3987645064 :         cy = mulredc3(z, x, y, m, invm);
     349  3987645064 :         break;
     350    27876146 :       case 4:
     351    27876146 :         cy = mulredc4(z, x, y, m, invm);
     352    27876146 :         break;
     353   165038451 :       case 5: 
     354   165038451 :         cy = mulredc5(z, x, y, m, invm);
     355   165038451 :         break;
     356    43877059 :       case 6: 
     357    43877059 :         cy = mulredc6(z, x, y, m, invm);
     358    43877059 :         break;
     359    14966255 :       case 7: 
     360    14966255 :         cy = mulredc7(z, x, y, m, invm);
     361    14966255 :         break;
     362    20153404 :       case 8:
     363    20153404 :         cy = mulredc8(z, x, y, m, invm);
     364    20153404 :         break;
     365     2255154 :       case 9:
     366     2255154 :         cy = mulredc9(z, x, y, m, invm);
     367     2255154 :         break;
     368     4341894 :       case 10:
     369     4341894 :         cy = mulredc10(z, x, y, m, invm);
     370     4341894 :         break;
     371     1882650 :       case 11:
     372     1882650 :         cy = mulredc11(z, x, y, m, invm);
     373     1882650 :         break;
     374     1082076 :       case 12:
     375     1082076 :         cy = mulredc12(z, x, y, m, invm);
     376     1082076 :         break;
     377      147589 :       case 13:
     378      147589 :         cy = mulredc13(z, x, y, m, invm);
     379      147589 :         break;
     380    42685654 :       case 14:
     381    42685654 :         cy = mulredc14(z, x, y, m, invm);
     382    42685654 :         break;
     383      159339 :       case 15:
     384      159339 :         cy = mulredc15(z, x, y, m, invm);
     385      159339 :         break;
     386      153379 :       case 16:
     387      153379 :         cy = mulredc16(z, x, y, m, invm);
     388      153379 :         break;
     389      140797 :       case 17:
     390      140797 :         cy = mulredc17(z, x, y, m, invm);
     391      140797 :         break;
     392      154383 :       case 18:
     393      154383 :         cy = mulredc18(z, x, y, m, invm);
     394      154383 :         break;
     395      682389 :       case 19:
     396      682389 :         cy = mulredc19(z, x, y, m, invm);
     397      682389 :         break;
     398     1189374 :       case 20:
     399     1189374 :         cy = mulredc20(z, x, y, m, invm);
     400     1189374 :         break;
     401           0 :       default:
     402           0 :         abort();
     403             :     }
     404             :   /* the result of Montgomery's REDC is less than 2^Nbits + N,
     405             :      thus at most one correction is enough */
     406  5792701534 :   if (cy != 0)
     407             :     {
     408             :       ATTRIBUTE_UNUSED mp_limb_t t;
     409    11006143 :       t = mpn_sub_n (z, z, m, nn); /* a borrow should always occur here */
     410             :       ASSERT (t == 1);
     411             :     }
     412  5792701534 : }
     413             : 
     414             : #if 0
     415             : /* {rp, n} <- {ap, n}^2/B^n mod {np, n} where B = 2^GMP_NUMB_BITS */
     416             : static void
     417             : sqrredc (mp_ptr rp, mp_srcptr ap, mp_srcptr np, const mp_size_t n,
     418             :          const mp_limb_t invm)
     419             : {
     420             :   mp_ptr cp;
     421             :   mp_size_t i;
     422             :   mp_limb_t cy, q;
     423             :   TMP_DECL;
     424             : 
     425             :   TMP_MARK;
     426             :   cp = TMP_ALLOC_LIMBS(2*n);
     427             :   for (i = 0; i < n; i++)
     428             :     umul_ppmm (cp[2*i+1], cp[2*i], ap[i], ap[i]);
     429             : 
     430             :   if (UNLIKELY(n == 1))
     431             :     {
     432             :       q = cp[0] * invm;
     433             :       rp[0] = mpn_addmul_1 (cp, np, 1, q);
     434             :       cy = mpn_add_n (rp, rp, cp + 1, 1);
     435             :       goto end_sqrredc;
     436             :     }
     437             : 
     438             :   if (cp[0] & (mp_limb_t) 1)
     439             :     /* cp[n] is either some ap[i]^2 mod B or floor(ap[i]^2/B),
     440             :        the latter is at most floor((B-1)^2/B) = B-2, and the former cannot be
     441             :        B-1 since -1 is not a square mod 2^n for n >1, thus there is no carry
     442             :        in cp[n] + ... below */
     443             :     cp[n] += mpn_add_n (cp, cp, np, n);
     444             :   /* now {cp, 2n} is even: divide by two */
     445             :   mpn_rshift (cp, cp, 2*n, 1);
     446             :   /* now cp[2n-1] is at most B/2-1 */
     447             : 
     448             :   for (i = 0; i < n - 1; i++)
     449             :     {
     450             :       q = cp[i] * invm;
     451             :       cp[i] = mpn_addmul_1 (cp + i, np, n, q);
     452             :       /* accumulate ap[i+1..n-1] * ap[i] */
     453             :       rp[i] = mpn_addmul_1 (cp + 2 * i + 1, ap + i + 1, n - 1 - i, ap[i]);
     454             :     }
     455             :   /* the last iteration did set cp[n-2] to zero, accumulated a[n-1] * a[n-2] */
     456             : 
     457             :   /* cp[2n-1] was untouched so far, so it is still at most B/2-1 */
     458             :   q = cp[n-1] * invm;
     459             :   rp[n-1] = mpn_addmul_1 (cp + n - 1, np, n, q);
     460             :   /* rp[n-1] <= floor((B^n-1)*(B-1)/B^n)<=B-2 */
     461             : 
     462             :   /* now add {rp, n}, {cp+n, n} and {cp, n-1} */
     463             :   /* cp[2n-1] still <= B/2-1 */
     464             :   rp[n-1] += mpn_add_n (rp, rp, cp, n-1); /* no overflow in rp[n-1] + ... */
     465             :   cy = mpn_add_n (rp, rp, cp + n, n);
     466             :   /* multiply by 2 */
     467             :   cy = (cy << 1) + mpn_lshift (rp, rp, n, 1);
     468             :  end_sqrredc:
     469             :   while (cy)
     470             :     cy -= mpn_sub_n (rp, rp, np, n);
     471             :   TMP_FREE;
     472             : }
     473             : #endif
     474             : 
     475             : #ifdef HAVE_NATIVE_MULREDC1_N
     476             : /* Multiplies y by the 1-limb value of x and does modulo reduction.
     477             :    The resulting residue may be multiplied by some constant, 
     478             :    which makes this function useful only for cases where, e.g.,
     479             :    all projective coordinates are multiplied by the same constant.
     480             :    More precisely it computes:
     481             :    {z, N} = {y, N} * x / 2^GMP_NUMB_BITS mod {m, N}
     482             : */
     483             : static void
     484    12724368 : mulredc_1 (mp_ptr z, const mp_limb_t x, mp_srcptr y, mp_srcptr m, 
     485             :            const mp_size_t N, const mp_limb_t invm)
     486             : {
     487             :   mp_limb_t cy;
     488             : 
     489    12724368 :   switch (N) {
     490      519496 :    case 1:
     491      519496 :     cy = mulredc1(z, x, y[0], m[0], invm);
     492      519496 :     break;
     493      381000 :    case 2:
     494      381000 :     cy = mulredc1_2(z, x, y, m, invm);
     495      381000 :     break;
     496           0 :    case 3:
     497           0 :     cy = mulredc1_3(z, x, y, m, invm);
     498           0 :     break;
     499      115568 :    case 4:
     500      115568 :     cy = mulredc1_4(z, x, y, m, invm);
     501      115568 :     break;
     502           0 :    case 5: 
     503           0 :     cy = mulredc1_5(z, x, y, m, invm);
     504           0 :     break;
     505       44520 :    case 6: 
     506       44520 :     cy = mulredc1_6(z, x, y, m, invm);
     507       44520 :     break;
     508           0 :    case 7: 
     509           0 :     cy = mulredc1_7(z, x, y, m, invm);
     510           0 :     break;
     511           0 :    case 8:
     512           0 :     cy = mulredc1_8(z, x, y, m, invm);
     513           0 :     break;
     514      127000 :    case 9:
     515      127000 :     cy = mulredc1_9(z, x, y, m, invm);
     516      127000 :     break;
     517           0 :    case 10:
     518           0 :     cy = mulredc1_10(z, x, y, m, invm);
     519           0 :     break;
     520           0 :    case 11:
     521           0 :     cy = mulredc1_11(z, x, y, m, invm);
     522           0 :     break;
     523           0 :    case 12:
     524           0 :     cy = mulredc1_12(z, x, y, m, invm);
     525           0 :     break;
     526           0 :    case 13:
     527           0 :     cy = mulredc1_13(z, x, y, m, invm);
     528           0 :     break;
     529    11536784 :    case 14:
     530    11536784 :     cy = mulredc1_14(z, x, y, m, invm);
     531    11536784 :     break;
     532           0 :    case 15:
     533           0 :     cy = mulredc1_15(z, x, y, m, invm);
     534           0 :     break;
     535           0 :    case 16:
     536           0 :     cy = mulredc1_16(z, x, y, m, invm);
     537           0 :     break;
     538           0 :    case 17:
     539           0 :     cy = mulredc1_17(z, x, y, m, invm);
     540           0 :     break;
     541           0 :    case 18:
     542           0 :     cy = mulredc1_18(z, x, y, m, invm);
     543           0 :     break;
     544           0 :    case 19:
     545           0 :     cy = mulredc1_19(z, x, y, m, invm);
     546           0 :     break;
     547           0 :    case 20:
     548           0 :     cy = mulredc1_20(z, x, y, m, invm);
     549           0 :     break;
     550           0 :    default:
     551             :     {
     552           0 :       abort ();
     553             :     }
     554             :   }
     555             :   /* the result of Montgomery's REDC is less than 2^Nbits + N,
     556             :      thus one correction (at most) is enough */
     557    12724368 :   if (cy != 0)
     558             :     {
     559             :       ATTRIBUTE_UNUSED mp_limb_t t;
     560       17488 :       t = mpn_sub_n (z, z, m, N); /* a borrow should always occur here */
     561             :       ASSERT (t == 1);
     562             :     }
     563    12724368 : }
     564             : #endif /* ifdef HAVE_NATIVE_MULREDC1_N */
     565             : #endif
     566             : 
     567             : static int tune_mulredc_table[] = TUNE_MULREDC_TABLE;
     568             : static int tune_sqrredc_table[] = TUNE_SQRREDC_TABLE;
     569             : 
     570             : static void 
     571  6305443576 : ecm_mulredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 
     572             :                         mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp)
     573             : {
     574             :   mp_limb_t cy;
     575             :   mp_size_t j;
     576             : 
     577  6305443576 :   if (nn <= MULREDC_ASSEMBLY_MAX)
     578             :     {
     579  6299991154 :       switch (tune_mulredc_table[nn])
     580             :         {
     581  5447234359 :         case MPMOD_MULREDC: /* use quadratic assembly mulredc */
     582             : #ifdef USE_ASM_REDC
     583  5447234359 :           mulredc (rp, s1p, s2p, np, nn, invm[0]);
     584  5447234359 :           break;
     585             : #endif /* otherwise go through to the next available mode */
     586   415286367 :         case MPMOD_MUL_REDC1: /* mpn_mul_n + __gmpn_redc_1 */
     587             : #ifdef HAVE___GMPN_REDC_1
     588   415286367 :           mpn_mul_n (tmp, s1p, s2p, nn);
     589   415286367 :           REDC1(rp, tmp, np, nn, invm[0]);
     590   415286367 :           break;
     591             : #endif /* otherwise go through to the next available mode */
     592   218735214 :         case MPMOD_MUL_REDC2: /* mpn_mul_n + __gmpn_redc_2 */
     593             : #ifdef HAVE___GMPN_REDC_2
     594   218735214 :           mpn_mul_n (tmp, s1p, s2p, nn);
     595   218735214 :           REDC2(rp, tmp, np, nn, invm);
     596   218735214 :           break;
     597             : #endif /* otherwise go through to the next available mode */
     598   218735214 :         case MPMOD_MUL_REDCN: /* mpn_mul_n + __gmpn_redc_n */
     599             :         /* disable redc_n for now, since it uses the opposite
     600             :            precomputed inverse wrt redc_1 and redc_2 */
     601             : #ifdef HAVE___GMPN_REDC_Nxxx
     602             :           mpn_mul_n (tmp, s1p, s2p, nn);
     603             :           __gmpn_redc_n (rp, tmp, np, nn, invm);
     604             :           break;
     605             : #endif /* otherwise go through to the next available mode */
     606             :         case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */
     607   218735214 :           mpn_mul_n (tmp, s1p, s2p, nn);
     608   958296976 :           for (j = 0; j < nn; j++, tmp++)
     609   739561762 :             tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]);
     610   218735214 :           cy = mpn_add_n (rp, tmp - nn, tmp, nn);
     611   218735214 :           if (cy != 0)
     612      829852 :             mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
     613   218735214 :           break;
     614           0 :         default:
     615             :           {
     616           0 :             outputf (OUTPUT_ERROR, "Invalid mulredc mode: %d\n",
     617             :                      tune_mulredc_table[nn]);
     618           0 :             exit (EXIT_FAILURE);
     619             :           }
     620             :         }
     621             :     }
     622             :   else /* nn > MULREDC_ASSEMBLY_MAX */
     623             :     {
     624     5452422 :       mpn_mul_n (tmp, s1p, s2p, nn);
     625     5452422 :       ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn);
     626             :     }
     627  6305443576 : }
     628             : 
     629             : static void 
     630  1498234598 : ecm_sqrredc_basecase_n (mp_ptr rp, mp_srcptr s1p,
     631             :                         mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp)
     632             : {
     633             :   mp_limb_t cy;
     634             :   mp_size_t j;
     635             : 
     636  1498234598 :   if (nn <= MULREDC_ASSEMBLY_MAX)
     637             :     {
     638  1496699025 :       switch (tune_sqrredc_table[nn])
     639             :         {
     640   345467175 :         case MPMOD_MULREDC: /* use quadratic assembly mulredc */
     641             : #ifdef USE_ASM_REDC
     642   345467175 :           mulredc (rp, s1p, s1p, np, nn, invm[0]);
     643   345467175 :           break;
     644             : #endif /* otherwise go through to the next available mode */
     645   901470860 :         case MPMOD_MUL_REDC1: /* mpn_sqr + __gmpn_redc_1 */
     646             : #ifdef HAVE___GMPN_REDC_1
     647   901470860 :           mpn_sqr (tmp, s1p, nn);
     648   901470860 :           REDC1(rp, tmp, np, nn, invm[0]);
     649   901470860 :           break;
     650             : #endif /* otherwise go through to the next available mode */
     651   137662930 :         case MPMOD_MUL_REDC2: /* mpn_sqr + __gmpn_redc_2 */
     652             : #ifdef HAVE___GMPN_REDC_2
     653   137662930 :           mpn_sqr (tmp, s1p, nn);
     654   137662930 :           REDC2(rp, tmp, np, nn, invm);
     655   137662930 :           break;
     656             : #endif /* otherwise go through to the next available mode */
     657   112098060 :         case MPMOD_MUL_REDCN: /* mpn_sqr + __gmpn_redc_n */
     658             :         /* disable redc_n for now, since it uses the opposite
     659             :            precomputed inverse wrt redc_1 and redc_2 */
     660             : #ifdef HAVE___GMPN_REDC_Nxxx
     661             :           mpn_sqr (tmp, s1p, nn);
     662             :           __gmpn_redc_n (rp, tmp, np, nn, invm);
     663             :           break;
     664             : #endif /* otherwise go through to the next available mode */
     665             :         case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */
     666   112098060 :           mpn_sqr (tmp, s1p, nn);
     667   521495067 :           for (j = 0; j < nn; j++, tmp++)
     668   409397007 :             tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]);
     669   112098060 :           cy = mpn_add_n (rp, tmp - nn, tmp, nn);
     670   112098060 :           if (cy != 0)
     671     1529587 :             mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */
     672   112098060 :           break;
     673           0 :         default:
     674             :           {
     675           0 :             outputf (OUTPUT_ERROR, "Invalid sqrredc mode: %d\n",
     676             :                      tune_sqrredc_table[nn]);
     677           0 :             exit (EXIT_FAILURE);
     678             :           }
     679             :         }
     680             :     }
     681             :   else /* nn > MULREDC_ASSEMBLY_MAX */
     682             :     {
     683     1535573 :       mpn_sqr (tmp, s1p, nn);
     684     1535573 :       ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn);
     685             :     }
     686  1498234598 : }
     687             : 
     688             : /* R <- S1 * S2 mod modulus
     689             :    i.e. R <- S1*S2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS.
     690             :    Same as ecm_redc_basecase previous, but combined with mul
     691             :    Neither input argument must be in modulus->temp1
     692             : */
     693             : static void 
     694  6250151104 : ecm_mulredc_basecase (mpres_t R, const mpres_t S1, const mpres_t S2, 
     695             :                       mpmod_t modulus)
     696             : {
     697  6250151104 :   mp_ptr s1p, s2p, rp = PTR(R);
     698  6250151104 :   mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
     699             : 
     700             :   ASSERT(ALLOC(R) >= nn);
     701             :   ASSERT(ALLOC(S1) >= nn);
     702             :   ASSERT(ALLOC(S2) >= nn);
     703  6250151104 :   s1p = PTR(S1);
     704  6250151104 :   s2p = PTR(S2);
     705             :   /* FIXME: S1 and S2 are input and marked const, we mustn't write to them */
     706  6338645174 :   for (j = ABSIZ(S1); j < nn; j++) 
     707    88494070 :     s1p[j] = 0;
     708  6321664022 :   for (j = ABSIZ(S2); j < nn; j++) 
     709    71512918 :     s2p[j] = 0;
     710             : 
     711  6250151104 :   ecm_mulredc_basecase_n (rp, s1p, s2p, PTR(modulus->orig_modulus), nn,
     712             :                           modulus->Nprim, PTR(modulus->temp1));
     713             : 
     714  6359618808 :   MPN_NORMALIZE (rp, nn);
     715  6250151104 :   SIZ(R) = (SIZ(S1)*SIZ(S2)) < 0 ? (int) -nn : (int) nn;
     716  6250151104 : }
     717             : 
     718             : /* R <- S1^2 mod modulus
     719             :    i.e. R <- S1^2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS.
     720             :    Same as ecm_redc_basecase previous, but combined with sqr
     721             :    The input argument must not be in modulus->temp1 */
     722             : static void 
     723  1443821126 : ecm_sqrredc_basecase (mpres_t R, const mpres_t S1, mpmod_t modulus)
     724             : {
     725             :   mp_ptr rp;
     726             :   mp_ptr s1p;
     727  1443821126 :   mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
     728             : 
     729             :   ASSERT(ALLOC(R) >= nn);
     730             :   ASSERT(ALLOC(S1) >= nn);
     731  1443821126 :   rp = PTR(R);
     732  1443821126 :   s1p = PTR(S1);
     733             :   /* FIXME: S1 is input and marked const, we mustn't write to it */
     734  1481815739 :   for (j = ABSIZ(S1); j < nn; j++)
     735    37994613 :     s1p[j] = 0;
     736             : 
     737  1443821126 :   ecm_sqrredc_basecase_n (rp, s1p, PTR(modulus->orig_modulus), nn,
     738             :                           modulus->Nprim, PTR(modulus->temp1));
     739             : 
     740  1484319059 :   MPN_NORMALIZE (rp, nn);
     741  1443821126 :   SIZ(R) = (int) nn;
     742  1443821126 : }
     743             : 
     744             : /* If the user asked for a particular representation, always use it.
     745             :    If repr = ECM_MOD_DEFAULT, use the thresholds.
     746             :    Don't use base2 if repr = ECM_MOD_NOBASE2.
     747             :    If a value is <= -16 or >= 16, it is a base2 exponent.
     748             :    Return a non-zero value if an error occurred.
     749             : */
     750             : int
     751        2511 : mpmod_init (mpmod_t modulus, const mpz_t N, int repr)
     752             : {
     753        2511 :   int base2 = 0, r = 0;
     754        2511 :   mp_size_t n = mpz_size (N);
     755             : 
     756        2511 :   switch (repr)
     757             :     {
     758        1479 :     case ECM_MOD_DEFAULT:
     759        1479 :       if ((base2 = isbase2 (N, BASE2_THRESHOLD)))
     760             :         {
     761         167 :           repr = ECM_MOD_BASE2;
     762         167 :           break;
     763             :         }
     764             :       /* else go through */
     765             : #if defined( __GNUC__ ) && __GNUC__ >= 7 && !defined(__ICC)
     766             :       __attribute__ ((fallthrough));
     767             : #endif
     768             :     case ECM_MOD_NOBASE2:
     769        1339 :       if (mpz_size (N) < MPZMOD_THRESHOLD)
     770        1328 :         repr = ECM_MOD_MODMULN;
     771          11 :       else if (mpz_size (N) < REDC_THRESHOLD)
     772          11 :         repr = ECM_MOD_MPZ;
     773             :       else
     774           0 :         repr = ECM_MOD_REDC;
     775             :     }
     776             : 
     777             :   /* now repr is {ECM_MOD_BASE2, ECM_MOD_MODMULN, ECM_MOD_MPZ, ECM_MOD_REDC},
     778             :      or |repr| >= 16. */
     779             : 
     780        2511 :   switch (repr)
     781             :     {
     782         343 :     case ECM_MOD_MPZ:
     783         343 :       outputf (OUTPUT_VERBOSE, "Using mpz_mod\n");
     784         343 :       mpmod_init_MPZ (modulus, N);
     785         343 :       break;
     786        1739 :     case ECM_MOD_MODMULN:
     787        1739 :       outputf (OUTPUT_VERBOSE, "Using MODMULN [mulredc:%d, sqrredc:%d]\n",
     788             :                (n <= MULREDC_ASSEMBLY_MAX) ? tune_mulredc_table[n] : 4,
     789             :                (n <= MULREDC_ASSEMBLY_MAX) ? tune_sqrredc_table[n] : 4);
     790        1739 :       mpmod_init_MODMULN (modulus, N);
     791        1739 :       break;
     792         242 :     case ECM_MOD_REDC:
     793         242 :       outputf (OUTPUT_VERBOSE, "Using REDC\n");
     794         242 :       mpmod_init_REDC (modulus, N);
     795         242 :       break;
     796         187 :     default: /* base2 case: either repr=ECM_MOD_BASE2, and base2 was
     797             :                 determined above, or |repr| >= 16, and we want base2 = repr */
     798         187 :       if (repr != ECM_MOD_BASE2)
     799          20 :         base2 = repr;
     800         187 :       r = mpmod_init_BASE2 (modulus, base2, N);
     801         187 :       break;
     802             :     }
     803             : 
     804        2511 :   return r;
     805             : }
     806             : 
     807             : void
     808     2890706 : mpres_clear (mpres_t a, ATTRIBUTE_UNUSED const mpmod_t modulus) 
     809             : {
     810     2890706 :   mpz_clear (a);
     811     2890706 :   PTR(a) = NULL; /* Make sure we segfault if we access it again */
     812     2890706 : }
     813             : 
     814             : void 
     815         424 : mpmod_init_MPZ (mpmod_t modulus, const mpz_t N)
     816             : {
     817             :   size_t n;
     818             : 
     819         424 :   mpz_init_set (modulus->orig_modulus, N);
     820         424 :   modulus->repr = ECM_MOD_MPZ;
     821             :   
     822         424 :   n = mpz_size (N); /* number of limbs of N */
     823         424 :   modulus->bits = n * GMP_NUMB_BITS; /* Number of bits, 
     824             :                                         rounded up to full limb */
     825             : 
     826         424 :   mpz_init2 (modulus->temp1, 2UL * modulus->bits + GMP_NUMB_BITS);
     827         424 :   mpz_init2 (modulus->temp2, modulus->bits);
     828         424 :   mpz_init2 (modulus->aux_modulus, modulus->bits);
     829         424 :   mpz_set_ui (modulus->aux_modulus, 1UL);
     830             :   /* we precompute B^(n + ceil(n/2)) mod N, where B=2^GMP_NUMB_BITS */
     831         424 :   mpz_mul_2exp (modulus->aux_modulus, modulus->aux_modulus,
     832         424 :                 (n + (n + 1) / 2) * GMP_NUMB_BITS);
     833         424 :   mpz_mod (modulus->aux_modulus, modulus->aux_modulus, N);
     834             : 
     835         424 :   return;
     836             : }
     837             : 
     838             : int 
     839         187 : mpmod_init_BASE2 (mpmod_t modulus, const int base2, const mpz_t N)
     840             : {
     841             :   int Nbits;
     842             :   
     843         187 :   outputf (OUTPUT_VERBOSE,
     844             :            "Using special division for factor of 2^%d%c1\n",
     845             :            abs (base2), (base2 < 0) ? '-' : '+');
     846         187 :   mpz_init_set (modulus->orig_modulus, N);
     847         187 :   modulus->repr = ECM_MOD_BASE2;
     848         187 :   modulus->bits = base2;
     849             : 
     850         187 :   Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded
     851             :                                            up to full limb */
     852             : 
     853         187 :   mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS);
     854         187 :   mpz_init2 (modulus->temp2, Nbits);
     855             :   
     856         187 :   mpz_set_ui (modulus->temp1, 1UL);
     857         187 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, abs (base2));
     858         187 :   if (base2 < 0)
     859         123 :     mpz_sub_ui (modulus->temp1, modulus->temp1, 1UL);
     860             :   else
     861          64 :     mpz_add_ui (modulus->temp1, modulus->temp1, 1UL);
     862         187 :   if (!mpz_divisible_p (modulus->temp1, N))
     863             :     {
     864          10 :        outputf (OUTPUT_ERROR, "mpmod_init_BASE2: n does not divide 2^%d%c1\n",
     865             :                 abs (base2), base2 < 0 ? '-' : '+');
     866          10 :        mpz_clear (modulus->temp2);
     867          10 :        mpz_clear (modulus->temp1);
     868          10 :        mpz_clear (modulus->orig_modulus);
     869          10 :        return ECM_ERROR;
     870             :     }
     871             :   
     872         177 :   modulus->Fermat = 0;
     873         177 :   if (base2 > 0)
     874             :     {
     875             :       unsigned long i;
     876         204 :       for (i = base2; (i & 1) == 0; i >>= 1);
     877          54 :       if (i == 1)
     878             :         {
     879          22 :           modulus->Fermat = base2;
     880             :         }
     881             :     }
     882             :   
     883         177 :   return 0;
     884             : }
     885             : 
     886             : /* initialize the following fields:
     887             :    orig_modulus - the original modulus
     888             :    bits         - # of bits of N, rounded up to a multiple of GMP_NUMB_BITS
     889             :    temp1, temp2 - auxiliary variables
     890             :    Nprim        - -1/N mod B^n where B=2^GMP_NUMB_BITS and n = #limbs(N)
     891             :    R2           - (2^bits)^2 (mod N)
     892             :    R3           - (2^bits)^3 (mod N)
     893             :    multiple     - smallest multiple of N >= 2^bits
     894             :  */
     895             : void
     896        1766 : mpmod_init_MODMULN (mpmod_t modulus, const mpz_t N)
     897             : {
     898             :   int Nbits;
     899             : 
     900        1766 :   mpz_init_set (modulus->orig_modulus, N);
     901             :   
     902        1766 :   modulus->repr = ECM_MOD_MODMULN;
     903        1766 :   Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded
     904             :                                            up to full limb */
     905        1766 :   modulus->bits = Nbits;
     906             : 
     907        1766 :   mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS);
     908        1766 :   mpz_init2 (modulus->temp2, Nbits + 1);
     909        1766 :   modulus->Nprim = (mp_limb_t*) malloc (mpz_size (N) * sizeof (mp_limb_t));
     910             : 
     911        1766 :   mpz_init2 (modulus->R2, Nbits);
     912        1766 :   mpz_set_ui (modulus->temp1, 1UL);
     913        1766 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits);
     914        1766 :   mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus);
     915             :   /* Now R2 = (2^bits)^2 (mod N) */
     916             :   
     917        1766 :   mpz_init2 (modulus->R3, Nbits);
     918        1766 :   mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits);
     919        1766 :   mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus);
     920             :   /* Now R3 = (2^bits)^3 (mod N) */
     921             : 
     922        1766 :   mpz_init2 (modulus->multiple, Nbits);
     923        1766 :   mpz_set_ui (modulus->temp1, 1UL);
     924        1766 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
     925             :   /* compute ceil(2^bits / N) */
     926        1766 :   mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus);
     927        1766 :   mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus);
     928             :   /* Now multiple is the smallest multiple of N >= 2^bits */
     929             : 
     930        1766 :   mpz_set_ui (modulus->temp1, 1UL);
     931        1766 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
     932             :   /* since we directly check even modulus in ecm/pm1/pp1,
     933             :      N is odd here, thus 1/N mod 2^Nbits always exist */
     934        1766 :   mpz_invert (modulus->temp2, N, modulus->temp1); /* temp2 = 1/N mod B^n */
     935        1766 :   mpz_sub (modulus->temp2, modulus->temp1, modulus->temp2);
     936             :   /* temp2 = -1/N mod B^n */
     937             :   /* ensure Nprim has all its n limbs correctly set, for ecm_redc_n */
     938        8752 :   MPN_ZERO(modulus->Nprim, mpz_size (N));
     939        1766 :   mpn_copyi (modulus->Nprim, PTR(modulus->temp2), ABSIZ(modulus->temp2));
     940        1766 : }
     941             : 
     942             : void 
     943         296 : mpmod_init_REDC (mpmod_t modulus, const mpz_t N)
     944             : {
     945             :   mp_size_t n;
     946             :   int Nbits;
     947             :   
     948         296 :   mpz_init_set (modulus->orig_modulus, N);
     949             :   
     950         296 :   n = mpz_size (N);
     951         296 :   modulus->repr = ECM_MOD_REDC;
     952         296 :   Nbits = n * GMP_NUMB_BITS; /* Number of bits, rounded
     953             :                                 up to full limb */
     954         296 :   modulus->bits = Nbits;
     955             :   
     956         296 :   mpz_init2 (modulus->temp1, 2 * Nbits + GMP_NUMB_BITS);
     957         296 :   mpz_init2 (modulus->temp2, Nbits);
     958         296 :   mpz_init2 (modulus->aux_modulus, Nbits);
     959             : 
     960         296 :   mpz_set_ui (modulus->temp1, 1UL);
     961         296 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
     962             :   /* since we directly check even modulus in ecm/pm1/pp1,
     963             :      N is odd here, thus 1/N mod 2^Nbits always exist */
     964         296 :   mpz_invert (modulus->aux_modulus, N, modulus->temp1);
     965             : 
     966         296 :   mpz_sub (modulus->aux_modulus, modulus->temp1, modulus->aux_modulus);
     967             :   /* ensure aux_modulus has n allocated limbs, for ecm_redc_n */
     968         296 :   if (ABSIZ(modulus->aux_modulus) < n)
     969             :     {
     970           8 :       _mpz_realloc (modulus->aux_modulus, n);
     971             :       /* in case the reallocation fails, _mpz_realloc sets the value to 0 */
     972           8 :       ASSERT_ALWAYS (mpz_cmp_ui (modulus->aux_modulus, 0) != 0);
     973          28 :       MPN_ZERO (PTR(modulus->aux_modulus) + ABSIZ(modulus->aux_modulus),
     974             :                 n - ABSIZ(modulus->aux_modulus));
     975             :     }
     976             : 
     977         296 :   mpz_init2 (modulus->R2, Nbits);
     978         296 :   mpz_set_ui (modulus->temp1, 1UL);
     979         296 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits);
     980         296 :   mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus);
     981             :   /* Now R2 = (2^bits)^2 (mod N) */
     982             :   
     983         296 :   mpz_init2 (modulus->R3, Nbits);
     984         296 :   mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits);
     985         296 :   mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus);
     986             :   /* Now R3 = (2^bits)^3 (mod N) */
     987             :   
     988         296 :   mpz_init (modulus->multiple);
     989         296 :   mpz_set_ui (modulus->temp1, 1UL);
     990         296 :   mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits);
     991             :   /* compute ceil(2^bits / N) */
     992         296 :   mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus);
     993         296 :   mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus);
     994             :   /* Now multiple is the largest multiple of N >= 2^bits */
     995         296 : }
     996             : 
     997             : void 
     998       10132 : mpmod_clear (mpmod_t modulus)
     999             : {
    1000       10132 :   mpz_clear (modulus->orig_modulus);
    1001       10132 :   mpz_clear (modulus->temp1);
    1002       10132 :   mpz_clear (modulus->temp2);
    1003       10132 :   if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ)
    1004        4889 :     mpz_clear (modulus->aux_modulus);
    1005       10132 :   if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1006             :     {
    1007        6617 :       mpz_clear (modulus->R2);
    1008        6617 :       mpz_clear (modulus->R3);
    1009        6617 :       mpz_clear (modulus->multiple);
    1010             :     }
    1011       10132 :   if (modulus->repr == ECM_MOD_MODMULN)
    1012        4969 :     free (modulus->Nprim);
    1013             :   
    1014       10132 :   return;
    1015             : }
    1016             : 
    1017             : /* initialize r and set all entries from those of modulus */
    1018             : void
    1019        7486 : mpmod_init_set (mpmod_t r, const mpmod_t modulus)
    1020             : {
    1021        7486 :   const unsigned long Nbits = abs(modulus->bits);
    1022        7486 :   const unsigned long n = mpz_size (modulus->orig_modulus);
    1023             : 
    1024        7486 :   r->repr = modulus->repr;
    1025        7486 :   r->bits = modulus->bits;
    1026        7486 :   r->Fermat = modulus->Fermat;
    1027        7486 :   mpz_init_set (r->orig_modulus, modulus->orig_modulus);
    1028        7486 :   mpz_init2 (r->temp1, 2 * Nbits + GMP_NUMB_BITS);
    1029        7486 :   mpz_init2 (r->temp2, Nbits + GMP_NUMB_BITS);
    1030        7486 :   if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1031             :     {
    1032        4567 :       mpz_init2 (r->multiple, Nbits);
    1033        4567 :       mpz_init2 (r->R2, Nbits);
    1034        4567 :       mpz_init2  (r->R3, Nbits);
    1035        4567 :       mpz_set (r->multiple, modulus->multiple);
    1036        4567 :       mpz_set (r->R2, modulus->R2);
    1037        4567 :       mpz_set (r->R3, modulus->R3);
    1038             :     }
    1039        7486 :   if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ)
    1040             :     {
    1041        4177 :       mpz_init2 (r->aux_modulus, Nbits);
    1042        4177 :       mpz_set (r->aux_modulus, modulus->aux_modulus);
    1043             :       /* for ECM_MOD_REDC, ensure r->aux_modulus has n limbs */
    1044        4177 :       _mpz_realloc (r->aux_modulus, n);
    1045        4738 :       MPN_ZERO (PTR(r->aux_modulus) + ABSIZ(r->aux_modulus),
    1046             :                 n - ABSIZ(r->aux_modulus));
    1047             :     }
    1048        7486 :   if (modulus->repr == ECM_MOD_MODMULN)
    1049             :     {
    1050        3212 :       r->Nprim = (mp_limb_t*) malloc (n * sizeof (mp_limb_t));
    1051        3212 :       mpn_copyi (r->Nprim, modulus->Nprim, n);
    1052             :     }
    1053        7486 : }
    1054             : 
    1055             : 
    1056             : void 
    1057     2901620 : mpres_init (mpres_t R, const mpmod_t modulus)
    1058             : {
    1059             :   /* use mpz_sizeinbase since modulus->bits may not be initialized yet */
    1060     2901620 :   mpz_init2 (R, mpz_sizeinbase (modulus->orig_modulus, 2) + GMP_NUMB_BITS);
    1061     2901620 : }
    1062             : 
    1063             : /* realloc R so that it has at least the same number of limbs as modulus */
    1064             : void
    1065     1485246 : mpres_realloc (mpres_t R, const mpmod_t modulus)
    1066             : {
    1067     1485246 :   if (modulus->repr == ECM_MOD_MODMULN)
    1068      696604 :     MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1069     1485246 : }
    1070             : 
    1071             : 
    1072             : /* a <- b^2 mod (modulus).
    1073             :    a and b might be equal, but cannot be modulus->temp1.
    1074             :    Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */
    1075             : static inline void
    1076    72933821 : mpres_pow_sqr (mpres_t a, const mpres_t b, mpmod_t modulus)
    1077             : {
    1078             :   /* Set temp2 = temp2*temp2 */
    1079    72933821 :   if (modulus->repr == ECM_MOD_BASE2)
    1080             :     {
    1081     4678530 :       mpz_mul (modulus->temp1, b, b);
    1082     4678530 :       base2mod (a, modulus->temp1, modulus->temp1, modulus);
    1083             :     }
    1084    68255291 :   else if (modulus->repr == ECM_MOD_MODMULN)
    1085    34128499 :     ecm_sqrredc_basecase (a, b, modulus);
    1086             :   else
    1087             :     {
    1088             :       ASSERT(modulus->repr == ECM_MOD_REDC);
    1089    34126792 :       mpz_mul (modulus->temp1, b, b);
    1090    34126792 :       REDC (a, modulus->temp1, modulus->temp2, modulus);
    1091             :     }
    1092    72933821 : }
    1093             : 
    1094             : /* a <- b*c mod (modulus)
    1095             :    a, b, c must not equal modulus->temp1.
    1096             :    Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */
    1097             : static inline void
    1098     9446697 : mpres_pow_mul (mpres_t a, const mpres_t b, const mpres_t c, mpmod_t modulus)
    1099             : {
    1100             :   /* Set temp2 = temp2*temp2 */
    1101     9446697 :   if (modulus->repr == ECM_MOD_BASE2)
    1102             :     {
    1103      459612 :       mpz_mul (modulus->temp1, b, c);
    1104      459612 :       base2mod (a, modulus->temp1, modulus->temp1, modulus);
    1105             :     }
    1106     8987085 :   else if (modulus->repr == ECM_MOD_MODMULN)
    1107     4493679 :     ecm_mulredc_basecase (a, b, c, modulus);
    1108             :   else
    1109             :     {
    1110             :       ASSERT(modulus->repr == ECM_MOD_REDC);
    1111     4493406 :       mpz_mul (modulus->temp1, b, c);
    1112     4493406 :       REDC (a, modulus->temp1, modulus->temp2, modulus);
    1113             :     }
    1114     9446697 : }
    1115             : 
    1116             : /* R <- BASE^EXP mod modulus.
    1117             :    Warning: EXP can be negative (only occurs for P-1 in revision 2982).
    1118             :  */
    1119             : void 
    1120     5344105 : mpres_pow (mpres_t R, const mpres_t BASE, const mpz_t EXP, mpmod_t modulus)
    1121             : {
    1122             :     size_t k, expnbits, K, lw, n0, i;
    1123             :     mpres_t *B;
    1124             :     mp_limb_t w;
    1125             : 
    1126             :     ASSERT_NORMALIZED(BASE);
    1127             : 
    1128     5344105 :   if (modulus->repr == ECM_MOD_MPZ)
    1129     5284533 :     mpz_powm (R, BASE, EXP, modulus->orig_modulus);
    1130       59572 :   else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN ||
    1131       20320 :            modulus->repr == ECM_MOD_REDC)
    1132             :     {
    1133             :       size_t expidx;
    1134             :       mp_limb_t bitmask, expbits;
    1135       59572 :       int sgn = mpz_sgn (EXP);
    1136             :       mpz_t exp;
    1137             : 
    1138             :       /* case EXP=0 */
    1139       59572 :       if (sgn == 0)
    1140             :         {
    1141         105 :           mpres_set_ui (R, 1UL, modulus); /* set result to 1 */
    1142             :           ASSERT_NORMALIZED (R);
    1143         105 :           return;
    1144             :         }
    1145             : 
    1146             :       /* exp <- |EXP| */
    1147       59467 :       PTR(exp) = PTR(EXP);
    1148       59467 :       SIZ(exp) = ABSIZ(EXP);
    1149             : 
    1150       59467 :       expidx = mpz_size (exp) - 1;         /* point at most significant limb */
    1151       59467 :       expbits = mpz_getlimbn (exp, expidx); /* get most significant limb */
    1152             :       ASSERT (expbits != 0);
    1153             : 
    1154             :       /* Scan for the MSB in expbits */
    1155       59467 :       bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1);
    1156     2109731 :       for (; (bitmask & expbits) == 0; bitmask >>= 1);
    1157             :     
    1158             :       /* here the most significant limb with any set bits is in expbits, */
    1159             :       /* bitmask is set to mask in the msb of expbits */
    1160             : 
    1161       59467 :       k = 1; /* sliding window size */
    1162       59467 :       expnbits = mpz_sizeinbase (exp, 2);
    1163             :       /* the average number of multiplications is 2^(k-1) + expnbits / (k+1) */
    1164      291875 :       while ((1 << (k-1)) + expnbits / (k + 1) > (1 << k) + expnbits / (k + 2))
    1165      232408 :         k ++;
    1166             :       /* precompute BASE^i for i = 2, 3, 5, ..., 2^k-1 */
    1167       59467 :       K = 1UL << (k - 1);
    1168       59467 :       B = malloc (K * sizeof (mpres_t));
    1169     1543665 :       for (i = 0; i < K; i++)
    1170             :         {
    1171     1484198 :           mpres_init (B[i], modulus);
    1172     1484198 :           mpres_realloc (B[i], modulus);
    1173     1484198 :           if (i == 0) /* store BASE^2 in B[0] */
    1174       59467 :             mpres_pow_sqr (B[i], BASE, modulus);
    1175     1424731 :           else if (i == 1)
    1176       58955 :             mpres_pow_mul (B[i], B[0], BASE, modulus);
    1177             :           else
    1178     1365776 :             mpres_pow_mul (B[i], B[i-1], B[0], modulus);
    1179             :         }
    1180             :       
    1181       59467 :       mpz_set (modulus->temp2, BASE);
    1182       59467 :       bitmask >>= 1;
    1183       59467 :       w = 0; /* invariant: temp2 has to be multiplied by BASE^w */
    1184       59467 :       lw = 0;   /* number of bits in w */
    1185       59467 :       expnbits --;     /* number of remaining bits to deal with */
    1186       59467 :       n0 = 0;   /* smallest bit index of current window */
    1187             : 
    1188             :       while (1) 
    1189             :         {
    1190    57867979 :           for ( ; bitmask != 0; bitmask >>= 1, expnbits--)
    1191             :             {
    1192             :               /* Set temp2 = temp2 ^ 2 */
    1193    56945245 :               mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus);
    1194    56945245 :               w = w << 1;
    1195    56945245 :               lw += (lw != 0);
    1196             : 
    1197             :               /* If bit is 1, accumulate in w */
    1198    56945245 :               if (expbits & bitmask)
    1199             :                 {
    1200    28487159 :                   w ++;
    1201    28487159 :                   if (lw == 0)
    1202             :                     {
    1203     8021966 :                       lw = 1;
    1204             :                       /* n0 points to the smallest bit of the current window */
    1205     8021966 :                       n0 = (expnbits >= k) ? expnbits - k : 0;
    1206             :                     }
    1207             :                   /* if the current bit is the smallest one of the window,
    1208             :                      we multiply by BASE^w */
    1209    28487159 :                   if (mpz_scan1 (exp, n0) == expnbits - 1)
    1210             :                     {
    1211             :                       ASSERT(w/2 < K);
    1212     8021966 :                       mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2],
    1213     8021966 :                                      modulus->temp2, modulus);
    1214     8021966 :                       w = lw = 0;
    1215             :                     }
    1216             :                 }
    1217             :             }
    1218      922734 :           if (expidx == 0)              /* if we just processed the least */
    1219       59467 :             break;                      /* significant limb, we are done */
    1220      863267 :           expidx --;
    1221      863267 :           expbits = mpz_getlimbn (exp, expidx);
    1222      863267 :           bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
    1223             :         }
    1224             :       ASSERT(w/2 < K);
    1225       59467 :       if (w != 0)
    1226           0 :         mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2],
    1227           0 :                        modulus->temp2, modulus);
    1228     1543665 :       for (i = 0; i < K; i++)
    1229     1484198 :         mpres_clear (B[i], modulus);
    1230       59467 :       free (B);
    1231       59467 :       mpz_set (R, modulus->temp2);
    1232             : 
    1233             :       /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|.
    1234             :          If EXP was negative, do a modular inverse */
    1235       59467 :       if (sgn < 0)
    1236         202 :         mpres_invert (R, R, modulus);
    1237             :     } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */
    1238             :   ASSERT_NORMALIZED (R);
    1239             : }
    1240             : 
    1241             : 
    1242             : /* Returns 1 if S == 0 (mod modulus), 0 otherwise */
    1243             : 
    1244             : int
    1245   340787144 : mpres_is_zero (const mpres_t S, mpmod_t modulus)
    1246             : {
    1247   340787144 :   mpz_mod (modulus->temp1, S, modulus->orig_modulus);
    1248             :   /* For all currently implemented representations, a zero residue has zero
    1249             :      integer representation */
    1250   340787144 :   return (mpz_sgn (modulus->temp1) == 0) ? 1 : 0;
    1251             : }
    1252             : 
    1253             : /* R <- BASE^EXP mod modulus */ 
    1254             : void 
    1255         247 : mpres_ui_pow (mpres_t R, const unsigned long BASE, const mpres_t EXP, 
    1256             :               mpmod_t modulus)
    1257             : {
    1258         247 :   if (modulus->repr == ECM_MOD_MPZ)
    1259             :     {
    1260         135 :       mpz_set_ui (modulus->temp1, BASE);
    1261         135 :       mpz_powm (R, modulus->temp1, EXP, modulus->orig_modulus);
    1262             :     }
    1263         112 :   else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN ||
    1264          48 :            modulus->repr == ECM_MOD_REDC)
    1265             :     {
    1266             :       size_t expidx;
    1267             :       mp_limb_t bitmask, expbits;
    1268             : 
    1269         112 :       expidx = mpz_size (EXP) - 1;         /* point at most significant limb */
    1270         112 :       expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */
    1271         112 :       bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
    1272             : 
    1273             :       /* case EXP=0 */
    1274         112 :       if (mpz_sgn (EXP) == 0)
    1275             :         {
    1276           0 :           mpres_set_ui (R, 1UL, modulus); /* set result to 1 */
    1277             :           ASSERT_NORMALIZED (R);
    1278           0 :           return;
    1279             :         }
    1280             : 
    1281             :       ASSERT (mpz_size (EXP) > 0);         /* probably redundant with _sgn() test */
    1282         112 :       expidx = mpz_size (EXP) - 1;         /* point at most significant limb */
    1283         112 :       expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */
    1284             :       ASSERT (expbits != 0);
    1285             : 
    1286             :       /* Scan for the MSB in expbits */
    1287         112 :       bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1);
    1288        3627 :       for (; (bitmask & expbits) == 0; bitmask >>= 1);
    1289             :     
    1290             :       /* here the most significant limb with any set bits is in expbits, */
    1291             :       /* bitmask is set to mask in the msb of expbits */
    1292             : 
    1293         112 :       mpz_set_ui (modulus->temp2, BASE); /* temp2 = BASE */
    1294         112 :       if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1295             :         {
    1296          96 :           mpz_mul_2exp (modulus->temp1, modulus->temp2, modulus->bits);
    1297          96 :           mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus);
    1298             :         }
    1299         112 :       bitmask >>= 1;
    1300             : 
    1301             : 
    1302             :       while (1) 
    1303             :         {
    1304    16178058 :           for ( ; bitmask != 0; bitmask >>= 1) 
    1305             :             {
    1306    15929109 :               mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus);
    1307             : 
    1308             :               /* If bit is 1, set temp2 = temp2 * BASE */
    1309    15929109 :               if (expbits & bitmask)
    1310             :                 {
    1311     7955730 :                   if (BASE == 2UL)
    1312             :                     {
    1313          26 :                       mpz_mul_2exp (modulus->temp2, modulus->temp2, 1);
    1314          26 :                       if (mpz_cmp (modulus->temp2, modulus->orig_modulus) >= 0)
    1315          12 :                         mpz_sub (modulus->temp2, modulus->temp2, modulus->orig_modulus);
    1316             :                     }
    1317             :                   else
    1318             :                     {
    1319     7955704 :                       mpz_mul_ui (modulus->temp1, modulus->temp2, BASE);
    1320     7955704 :                       mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus);
    1321             :                     }
    1322             :                 }
    1323             :             }
    1324      248949 :           if (expidx == 0)              /* if we just processed the least */
    1325         112 :             break;                      /* significant limb, we are done */
    1326      248837 :           expidx--;
    1327      248837 :           expbits = mpz_getlimbn (EXP, expidx);
    1328      248837 :           bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
    1329             :         }
    1330         112 :       mpz_set (R, modulus->temp2);
    1331             : 
    1332             :       /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|.
    1333             :          If EXP was negative, do a modular inverse */
    1334         112 :       if (mpz_sgn (EXP) < 0)
    1335             :         {
    1336           0 :           mpres_invert (R, R, modulus);
    1337             :         }
    1338             :     } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */
    1339             :   ASSERT_NORMALIZED (R);
    1340             : }
    1341             : 
    1342             : /* We use here the algorithm described in "Fast Modular Reduction" from
    1343             :    Hasenplaugh, Gaubatz and Gobal, Arith'18, 2007: assuming N has n limbs,
    1344             :    we have precomputed C = B^(n + ceil(n/2)) mod N. */
    1345             : static void
    1346   351006637 : mpres_mpz_mod (mpres_t R, mpz_t T, mpz_t N, mpz_t C)
    1347             : {
    1348   351006637 :   size_t n = mpz_size (N);
    1349   351006637 :   size_t t = mpz_size (T);
    1350   351006637 :   size_t m = n + (n + 1) / 2; /* n + ceil(n/2) */
    1351             : 
    1352   351006637 :   if (t > m && n > 1) /* if n=1, then m=2, thus h=0 */
    1353             :     {
    1354    87988003 :       size_t c = mpz_size (C);
    1355             :       size_t h, l;
    1356             :       mp_ptr rp;
    1357    87988003 :       mp_ptr tp = PTR(T);
    1358             : 
    1359             :       /* Warning: we might have t > 2n. In that case we reduce
    1360             :          {tp+l+m, t-(m+l)} where l = t-2n. */
    1361    87988003 :       l = (t > 2 * n) ? t - 2 * n : 0;
    1362             : 
    1363    87988003 :       tp += l;
    1364    87988003 :       h = t - (m + l); /* since t-l <= 2n and m = n + ceil(n/2),
    1365             :                           we have h <= n - ceil(n/2) = floor(n/2).
    1366             :                           On the other hand, if l=0 we have h = t-m > 0;
    1367             :                           if l>0, then l=t-2n, thus h=2n-m = floor(n/2) > 0
    1368             :                           since n > 1. */
    1369    87988003 :       mpz_realloc (R, c + h);
    1370    87988003 :       rp = PTR(R);
    1371    87988003 :       if (c > h)
    1372    87417036 :         mpn_mul (rp, PTR(C), c, tp + m, h);
    1373             :       else
    1374      570967 :         mpn_mul (rp, tp + m, h, PTR(C), c);
    1375             :       /* now add {rp, c+h} to {tp, m}: we have c <= n and h <= n/2,
    1376             :          thus c + h <= m */
    1377    87988003 :       if (c + h > m) abort();
    1378    87988003 :       tp[m] = mpn_add (tp, tp, m, rp, c + h);
    1379    87988003 :       m += l + tp[m];
    1380    87988003 :       tp -= l; /* put back the low l limbs */
    1381    87988003 :       MPN_NORMALIZE(tp, m);
    1382    87988003 :       SIZ(T) = (SIZ(T) > 0) ? m : -m;
    1383             :     }
    1384             :   
    1385   351006637 :   mpz_mod (R, T, N);
    1386   351006637 : }
    1387             : 
    1388             : void 
    1389  6726466993 : mpres_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    1390             : {
    1391             :   ASSERT_NORMALIZED (S1);
    1392             :   ASSERT_NORMALIZED (S2);
    1393             : 
    1394             : #ifdef WANT_ASSERT_EXPENSIVE
    1395             :   mpz_t test1, test2, test_result1, test_result2;
    1396             :   ASSERT_ALWAYS (S1 != modulus->temp1 && S2 != modulus->temp1 &&
    1397             :                  R != modulus->temp1);
    1398             :   mpz_init (test1);
    1399             :   mpz_init (test2);
    1400             :   mpz_init (test_result1);
    1401             :   mpz_init (test_result2);
    1402             :   mpres_get_z (test1, S1, modulus);
    1403             :   mpres_get_z (test2, S2, modulus);
    1404             :   mpz_mul (test_result1, test1, test2);
    1405             :   mpz_mod (test_result1, test_result1, modulus->orig_modulus);
    1406             : #endif
    1407             : 
    1408  6726466993 :   if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768))
    1409             :     {
    1410       40743 :       mp_size_t n = modulus->Fermat / GMP_NUMB_BITS;
    1411             :       unsigned long k;
    1412             :       mp_srcptr s1p, s2p;
    1413             :       mp_size_t s1s, s2s;
    1414             :       
    1415       40743 :       MPZ_REALLOC (R, n + 1);
    1416       40743 :       s1p = PTR(S1);
    1417       40743 :       s1s = SIZ(S1);
    1418       40743 :       s2p = PTR(S2);
    1419       40743 :       s2s = SIZ(S2);
    1420             :       
    1421       40743 :       k = mpn_fft_best_k (n, S1 == S2);
    1422             :       ASSERT(mpn_fft_next_size (n, k) == n);
    1423             : 
    1424       40743 :       if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus))
    1425             :         {
    1426        6914 :           s1p = PTR(modulus->temp1);
    1427        6914 :           s1s = SIZ(modulus->temp1);
    1428             :         }
    1429       40743 :       if (S1 == S2)
    1430             :         {
    1431       13507 :           s2p = s1p;
    1432       13507 :           s2s = s1s;
    1433             :         }
    1434       27236 :       else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus))
    1435             :         {
    1436        3651 :           s2p = PTR(modulus->temp2);
    1437        3651 :           s2s = SIZ(modulus->temp2);
    1438             :         }
    1439             : 
    1440             :       /* mpn_mul_fft() computes the product modulo B^n + 1, where 
    1441             :          B = 2^(machine word size in bits). So the result can be = B^n, 
    1442             :          in that case R is set to zero and 1 is returned as carry-out.
    1443             :          In all other cases 0 is returned. Hence the complete result is 
    1444             :          R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */
    1445       40743 :       PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k);
    1446       40743 :       n ++;
    1447       97294 :       MPN_NORMALIZE(PTR(R), n);
    1448       40743 :       SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n;
    1449             : 
    1450       40743 :       return;
    1451             :     }
    1452             : 
    1453  6726426250 :   switch (modulus->repr)
    1454             :     {
    1455    17069513 :     case ECM_MOD_BASE2:
    1456    17069513 :       mpz_mul (modulus->temp1, S1, S2);
    1457    17069513 :       base2mod (R, modulus->temp1, modulus->temp1, modulus);
    1458    17069513 :       break;
    1459  6234295496 :     case ECM_MOD_MODMULN:
    1460  6234295496 :       MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1461  6234295496 :       ecm_mulredc_basecase (R, S1, S2, modulus);
    1462  6234295496 :       break;
    1463   231968646 :     case ECM_MOD_REDC:
    1464   231968646 :       mpz_mul (modulus->temp1, S1, S2);
    1465   231968646 :       REDC (R, modulus->temp1, modulus->temp2, modulus);
    1466   231968646 :       break;
    1467   243092595 :     default: /* case ECM_MOD_MPZ */
    1468   243092595 :       mpz_mul (modulus->temp1, S1, S2);
    1469   243092595 :       mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus,
    1470   243092595 :                      modulus->aux_modulus);
    1471   243092595 :       break;
    1472             :     }
    1473             :   ASSERT_NORMALIZED (R);
    1474             : 
    1475             : #ifdef WANT_ASSERT_EXPENSIVE
    1476             :   mpres_get_z (test_result2, R, modulus);
    1477             :   if (mpz_cmp (test_result1, test_result2) != 0)
    1478             :     {
    1479             :       printf ("mpres_mul and mpz_mul/mpz_mod produced different results.\n");
    1480             :       gmp_printf ("input 1:         %Zd\n", test1);
    1481             :       gmp_printf ("input 2:         %Zd\n", test2);
    1482             :       gmp_printf ("mpres_mul:       %Zd\n", test_result2);
    1483             :       gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1);
    1484             :       abort ();
    1485             :     }
    1486             :   mpz_clear (test1);
    1487             :   mpz_clear (test2);
    1488             :   mpz_clear (test_result1);
    1489             :   mpz_clear (test_result2);
    1490             : #endif
    1491             : }
    1492             : 
    1493             : /* R <- S1^2 mod modulus */
    1494             : void 
    1495  1632909584 : mpres_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus)
    1496             : {
    1497             :   ASSERT_NORMALIZED (S1);
    1498             : 
    1499             : #ifdef WANT_ASSERT_EXPENSIVE
    1500             :   mpz_t test1, test_result1, test_result2;
    1501             :   ASSERT_ALWAYS (S1 != modulus->temp1 && R != modulus->temp1);
    1502             :   mpz_init (test1);
    1503             :   mpz_init (test_result1);
    1504             :   mpz_init (test_result2);
    1505             :   mpres_get_z (test1, S1, modulus);
    1506             :   mpz_mul (test_result1, test1, test1);
    1507             :   mpz_mod (test_result1, test_result1, modulus->orig_modulus);
    1508             : #endif
    1509             : 
    1510  1632909584 :   if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768))
    1511             :     {
    1512       13507 :       mpres_mul (R, S1, S1, modulus);
    1513       13507 :       return;
    1514             :     }
    1515             : 
    1516  1632896077 :   switch (modulus->repr)
    1517             :     {
    1518     7435109 :     case ECM_MOD_BASE2:
    1519     7435109 :       mpz_mul (modulus->temp1, S1, S1);
    1520     7435109 :       base2mod (R, modulus->temp1, modulus->temp1, modulus);
    1521     7435109 :       break;
    1522  1409692627 :     case ECM_MOD_MODMULN:
    1523  1409692627 :       MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1524  1409692627 :       ecm_sqrredc_basecase (R, S1, modulus);
    1525  1409692627 :       break;
    1526   107854299 :     case ECM_MOD_REDC:
    1527   107854299 :       mpz_mul (modulus->temp1, S1, S1);
    1528   107854299 :       REDC (R, modulus->temp1, modulus->temp2, modulus);
    1529   107854299 :       break;
    1530   107914042 :     default: /* case ECM_MOD_MPZ */
    1531   107914042 :       mpz_mul (modulus->temp1, S1, S1);
    1532   107914042 :       mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus,
    1533   107914042 :                      modulus->aux_modulus);
    1534   107914042 :       break;
    1535             :     }
    1536             :   ASSERT_NORMALIZED (R);
    1537             : 
    1538             : #ifdef WANT_ASSERT_EXPENSIVE
    1539             :   mpres_get_z (test_result2, R, modulus);
    1540             :   if (mpz_cmp (test_result1, test_result2) != 0)
    1541             :     {
    1542             :       printf ("mpres_sqr and mpz_mul/mpz_mod produced different results.\n");
    1543             :       gmp_printf ("input 1:         %Zd\n", test1);
    1544             :       gmp_printf ("mpres_mul:       %Zd\n", test_result2);
    1545             :       gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1);
    1546             :       abort ();
    1547             :     }
    1548             :   mpz_clear (test1);
    1549             :   mpz_clear (test_result1);
    1550             :   mpz_clear (test_result2);
    1551             : #endif
    1552             : }
    1553             : 
    1554             : /* R <- S * n mod modulus */
    1555             : void 
    1556     9715225 : mpres_mul_ui (mpres_t R, const mpres_t S, const unsigned long n, 
    1557             :               mpmod_t modulus)
    1558             : {
    1559             :   ASSERT_NORMALIZED (S);
    1560     9715225 :   mpz_mul_ui (modulus->temp1, S, n);
    1561             :   /* This is the same for all methods: just reduce with original modulus */
    1562     9715225 :   mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1563             :   ASSERT_NORMALIZED (R);
    1564     9715225 : }
    1565             : 
    1566             : /* R <- S * 2^k mod modulus */
    1567             : void 
    1568         152 : mpres_mul_2exp (mpres_t R, const mpres_t S, const unsigned long k, 
    1569             :               mpmod_t modulus)
    1570             : {
    1571             :   ASSERT_NORMALIZED (S);
    1572         152 :   mpz_mul_2exp (modulus->temp1, S, k);
    1573             :   /* This is the same for all methods: just reduce with original modulus */
    1574         152 :   mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1575             :   ASSERT_NORMALIZED (R);
    1576         152 : }
    1577             : 
    1578             : /* This function multiplies an integer in mpres_t form with an integer in
    1579             :    mpz_t form, and stores the output in mpz_t form. The advantage is that
    1580             :    one REDC suffices to reduce the product and convert it to non-Montgomery
    1581             :    representation. */
    1582             : 
    1583             : void 
    1584    12896712 : mpres_mul_z_to_z (mpz_t R, const mpres_t S1, const mpz_t S2, mpmod_t modulus)
    1585             : {
    1586             :   ASSERT_NORMALIZED (S1);
    1587             : 
    1588    12896712 :   if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)
    1589             :     {
    1590          37 :       mp_size_t n = modulus->Fermat / GMP_NUMB_BITS;
    1591             :       unsigned long k;
    1592          37 :       mp_srcptr s1p = PTR(S1), s2p = PTR(S2);
    1593          37 :       mp_size_t s1s = SIZ(S1), s2s = SIZ(S2);
    1594             : 
    1595          37 :       MPZ_REALLOC (R, n + 1);
    1596          37 :       k = mpn_fft_best_k (n, S1 == S2);
    1597             :       ASSERT(mpn_fft_next_size (n, k) == n);
    1598             : 
    1599          37 :       if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus))
    1600             :         {
    1601           0 :           s1p = PTR(modulus->temp1);
    1602           0 :           s1s = SIZ(modulus->temp1);
    1603             :         }
    1604          37 :       if (S1 == S2)
    1605             :         {
    1606           0 :           s2p = s1p;
    1607           0 :           s2s = s1s;
    1608             :         }
    1609          37 :       else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus))
    1610             :         {
    1611           0 :           s2p = PTR(modulus->temp2);
    1612           0 :           s2s = SIZ(modulus->temp2);
    1613             :         }
    1614             : 
    1615             :       /* mpn_mul_fft() computes the product modulo B^n + 1, where 
    1616             :          B = 2^(machine word size in bits). So the result can be = B^n, 
    1617             :          in that case R is set to zero and 1 is returned as carry-out.
    1618             :          In all other cases 0 is returned. Hence the complete result is 
    1619             :          R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */
    1620          37 :       PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k);
    1621          37 :       n ++;
    1622        1607 :       MPN_NORMALIZE(PTR(R), n);
    1623          37 :       SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n;
    1624          37 :       mpz_mod (R, R, modulus->orig_modulus);
    1625             : 
    1626          37 :       return;
    1627             :     }
    1628             : 
    1629    12896675 :   switch (modulus->repr)
    1630             :     {
    1631       23965 :     case ECM_MOD_BASE2:
    1632       23965 :       if (mpz_sizeinbase (S2, 2) > (unsigned) abs (modulus->bits))
    1633             :         {
    1634        1890 :           base2mod (modulus->temp2, S2, modulus->temp1, modulus);
    1635        1890 :           mpz_mul (modulus->temp1, S1, modulus->temp2);
    1636             :         }
    1637             :       else
    1638       22075 :         mpz_mul (modulus->temp1, S1, S2);
    1639       23965 :       base2mod (R, modulus->temp1, modulus->temp1, modulus);
    1640       23965 :       mpz_mod (R, R, modulus->orig_modulus);
    1641       23965 :       break;
    1642     9126249 :     case ECM_MOD_MODMULN:
    1643     9126249 :       if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
    1644             :         {
    1645       34576 :           mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
    1646       34576 :           MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1647       34576 :           ecm_mulredc_basecase (R, S1, modulus->temp2, modulus);
    1648       34576 :           mpz_mod (R, R, modulus->orig_modulus);
    1649             :         }
    1650             :       else
    1651             :         {
    1652     9091673 :           MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1653     9091673 :           if (ABSIZ(S2) < modulus->bits / GMP_NUMB_BITS)
    1654             :             {
    1655             :               mpz_t t;
    1656      384861 :               mpz_init2 (t, modulus->bits);
    1657      384861 :               mpz_set (t, S2);
    1658      384861 :               ecm_mulredc_basecase (R, S1, t, modulus);
    1659      384861 :               mpz_clear (t);
    1660             :             }
    1661             :           else
    1662     8706812 :             ecm_mulredc_basecase (R, S1, S2, modulus);
    1663     9091673 :           mpz_mod (R, R, modulus->orig_modulus);
    1664             :         }
    1665     9126249 :       break;
    1666     1024371 :     case ECM_MOD_REDC:
    1667     1024371 :       if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
    1668             :         {
    1669        2220 :           mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
    1670        2220 :           mpz_mul (modulus->temp1, S1, modulus->temp2);
    1671             :         }
    1672             :       else
    1673     1022151 :         mpz_mul (modulus->temp1, S1, S2);
    1674     1024371 :       REDC (R, modulus->temp1, modulus->temp2, modulus);
    1675     1024371 :       mpz_mod (R, R, modulus->orig_modulus);
    1676     1024371 :       break;
    1677     2722090 :     default:
    1678     2722090 :       if (mpz_cmp (S2, modulus->orig_modulus) >= 0)
    1679             :         {
    1680       73585 :           mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
    1681       73585 :           mpz_mul (modulus->temp1, S1, modulus->temp2);
    1682             :         }
    1683             :       else
    1684     2648505 :         mpz_mul (modulus->temp1, S1, S2);
    1685     2722090 :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1686     2722090 :       break;
    1687             :     }
    1688             :   ASSERT_NORMALIZED (R);
    1689             : }
    1690             : 
    1691             : 
    1692             : /* Sets R = S * c, for some constant c that is coprime to modulus.
    1693             :    This is primarily useful for multiplying numbers together for a gcd with
    1694             :    modulus. The advantage is that we don't need to convert the mpz_t 
    1695             :    to Montgomery representation before applying REDC. */
    1696             : 
    1697             : void 
    1698     6385993 : mpres_set_z_for_gcd (mpres_t R, const mpz_t S, mpmod_t modulus)
    1699             : {
    1700     6385993 :   mpz_mod (R, S, modulus->orig_modulus);
    1701             :   ASSERT_NORMALIZED (R);  
    1702     6385993 : }
    1703             : 
    1704             : /* Companion function to mpres_set_z_for_gcd(). It divides by c^n. 
    1705             :    In case of MULREDC with k b-bit words, c = 1/2^(b*k), so we multiply 
    1706             :    by 2^(n*b*k). The purpose is to fix products of n terms collected by 
    1707             :    using mpres_set_z_for_gcd(), so that we can still get the exact residue. */
    1708             : 
    1709             : void 
    1710         596 : mpres_set_z_for_gcd_fix (mpres_t R, const mpres_t S, const mpz_t n, mpmod_t modulus)
    1711             : {
    1712         596 :   switch (modulus->repr)
    1713             :     {
    1714         372 :       case ECM_MOD_MODMULN:
    1715             :       case ECM_MOD_REDC:
    1716             :         {
    1717             :           mpres_t po2;
    1718             :           mpz_t nb;
    1719             : 
    1720         372 :           mpz_init (nb);
    1721         372 :           mpres_init (po2, modulus);
    1722             : 
    1723             :           /* Note: modulus->bits is always non-negative for MODMULN and REDC */
    1724         372 :           mpz_mul_ui (nb, n, modulus->bits);
    1725         372 :           mpres_set_ui (po2, 2, modulus);
    1726         372 :           mpres_pow (po2, po2, nb, modulus);
    1727         372 :           mpres_mul (R, S, po2, modulus);
    1728             : 
    1729         372 :           mpz_clear (nb);
    1730         372 :           mpres_clear (po2, modulus);
    1731             :         }
    1732             :     }
    1733         596 : }
    1734             : 
    1735             : 
    1736             : /* R <- S / 2^n mod modulus. Does not need to be fast. */
    1737             : void 
    1738       12851 : mpres_div_2exp (mpres_t R, const mpres_t S, const unsigned int n, 
    1739             :                 mpmod_t modulus)
    1740             : {
    1741             :   int i;
    1742             :   ASSERT_NORMALIZED (S);
    1743             : 
    1744       12851 :   if (n == 0)
    1745             :     {
    1746           0 :       mpres_set (R, S, modulus);
    1747             :       ASSERT_NORMALIZED (R);
    1748           0 :       return;
    1749             :     }
    1750             : 
    1751       12851 :     if (mpz_odd_p (S))
    1752             :       {
    1753             :         ASSERT (mpz_odd_p (modulus->orig_modulus));
    1754        4590 :         mpz_add (R, S, modulus->orig_modulus);
    1755        4590 :         mpz_tdiv_q_2exp (R, R, 1);
    1756             :       }
    1757             :     else
    1758        8261 :       mpz_tdiv_q_2exp (R, S, 1);
    1759             : 
    1760       20426 :     for (i = n ; i > 1; i--)
    1761             :       {
    1762        7575 :         if (mpz_odd_p (R))
    1763             :           {
    1764             :             ASSERT (mpz_odd_p (modulus->orig_modulus));
    1765        2298 :             mpz_add (R, R, modulus->orig_modulus);
    1766             :           }
    1767        7575 :         mpz_tdiv_q_2exp (R, R, 1);
    1768             :       }
    1769             : 
    1770             :     ASSERT_NORMALIZED (R);
    1771             : }
    1772             : 
    1773             : void
    1774        2560 : mpres_add_ui (mpres_t R, const mpres_t S, const unsigned long n, 
    1775             :               mpmod_t modulus)
    1776             : {
    1777             :   ASSERT_NORMALIZED (S);
    1778        2560 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1779             :     {
    1780         569 :       mpz_add_ui (R, S, n);
    1781         569 :       if (mpz_cmp (R, modulus->orig_modulus) > 0)
    1782          38 :         mpz_sub (R, R, modulus->orig_modulus); /* This assumes modulus >= n */
    1783             :     }
    1784        1991 :   else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1785             :     {
    1786        1991 :       mpz_set_ui (modulus->temp1, n);
    1787        1991 :       mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
    1788        1991 :       mpz_add (modulus->temp1, modulus->temp1, S);
    1789        1991 :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1790             :     }
    1791             :   ASSERT_NORMALIZED (R);
    1792        2560 : }
    1793             : 
    1794             : /* R <- S1 + S2 mod modulus */
    1795             : void 
    1796  1549778592 : mpres_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    1797             : {
    1798             :   ASSERT_NORMALIZED (S1);
    1799             :   ASSERT_NORMALIZED (S2);
    1800  1549778592 :   mpz_add (R, S1, S2);
    1801  1549778592 :   if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) &&
    1802  1385479182 :       ABSIZ(R) > ABSIZ(modulus->orig_modulus))
    1803             :     {
    1804    28701423 :       if (SIZ(R) > 0)
    1805    24994613 :         mpz_sub (R, R, modulus->multiple);
    1806             :       else
    1807     3706810 :         mpz_add (R, R, modulus->multiple);
    1808             :       /* N <= since multiple < 2^Nbits + N, now |R| < B */
    1809             :     }
    1810             :   ASSERT_NORMALIZED (R);
    1811  1549778592 : }
    1812             : 
    1813             : /* R <- S - n mod modulus
    1814             :    If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to
    1815             :    Montgomery representation before substracting
    1816             : */
    1817             : void
    1818   535040529 : mpres_sub_ui (mpres_t R, const mpres_t S, const unsigned long n, 
    1819             :               mpmod_t modulus)
    1820             : {
    1821             :   ASSERT_NORMALIZED (S);
    1822   535040529 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1823             :     {
    1824     2056727 :       mpz_sub_ui (R, S, n);
    1825     2056727 :       if (mpz_sgn (R) < 0)
    1826         168 :         mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */
    1827             :     }
    1828   532983802 :   else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1829             :     {
    1830   532983802 :       mpz_set_ui (modulus->temp1, n);
    1831   532983802 :       mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
    1832   532983802 :       mpz_sub (modulus->temp1, S, modulus->temp1);
    1833   532983802 :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1834             :     }
    1835             :   ASSERT_NORMALIZED (R);
    1836   535040529 : }
    1837             : 
    1838             : #if 0
    1839             : /* R <- n - S mod modulus
    1840             :    If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to
    1841             :    Montgomery representation before substracting
    1842             : */
    1843             : void
    1844             : mpres_ui_sub (mpres_t R, const unsigned long n ,const mpres_t S, 
    1845             :               mpmod_t modulus)
    1846             : {
    1847             :   ASSERT_NORMALIZED (S);
    1848             :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1849             :     {
    1850             :       mpz_ui_sub (R, n, S);
    1851             :       if (mpz_sgn (R) < 0)
    1852             :         mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */
    1853             :     }
    1854             :   else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1855             :     {
    1856             :       mpz_set_ui (modulus->temp1, n);
    1857             :       mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
    1858             :       mpz_sub (modulus->temp1, modulus->temp1, S);
    1859             :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1860             :     }
    1861             :   ASSERT_NORMALIZED (R);
    1862             : }
    1863             : #endif
    1864             : 
    1865             : /* R <- S1 - S2 mod modulus */
    1866             : void 
    1867  6255342193 : mpres_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    1868             : {
    1869             :   ASSERT_NORMALIZED (S1);
    1870             :   ASSERT_NORMALIZED (S2);
    1871  6255342193 :   mpz_sub (R, S1, S2);
    1872  6255342193 :   if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) &&
    1873  6062129372 :       ABSIZ(R) > ABSIZ(modulus->orig_modulus))
    1874             :     {
    1875    87388492 :       if (SIZ(R) > 0)
    1876    44990659 :         mpz_sub (R, R, modulus->multiple);
    1877             :       else
    1878    42397833 :         mpz_add (R, R, modulus->multiple);
    1879             :       /* N <= since multiple < 2^Nbits + N, now |R| < B */
    1880             :     }
    1881             :   ASSERT_NORMALIZED (R);
    1882  6255342193 : }
    1883             : 
    1884             : void 
    1885       11974 : mpres_set_z (mpres_t R, const mpz_t S, mpmod_t modulus)
    1886             : {
    1887       11974 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1888        2282 :     mpz_mod (R, S, modulus->orig_modulus);
    1889        9692 :   else if (modulus->repr == ECM_MOD_MODMULN)
    1890             :     {
    1891        8594 :       mpz_mod (modulus->temp2, S, modulus->orig_modulus);
    1892        8594 :       ecm_mulredc_basecase (R, modulus->temp2, modulus->R2, modulus);
    1893             :     }
    1894        1098 :   else if (modulus->repr == ECM_MOD_REDC)
    1895             :     {
    1896        1098 :       mpz_mod (modulus->temp2, S, modulus->orig_modulus);
    1897        1098 :       mpz_mul (modulus->temp1, modulus->temp2, modulus->R2);
    1898        1098 :       REDC (R, modulus->temp1, modulus->temp2, modulus);
    1899             :     }
    1900             :   ASSERT_NORMALIZED (R);
    1901       11974 : }
    1902             : 
    1903             : /* R and S must not be modulus->temp1 */
    1904             : void 
    1905    16723361 : mpres_get_z (mpz_t R, const mpres_t S, mpmod_t modulus)
    1906             : {
    1907             :   ASSERT_NORMALIZED (S);
    1908    16723361 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1909             :     {
    1910      990449 :       mpz_mod (R, S, modulus->orig_modulus);
    1911             :     }
    1912    15732912 :   else if (modulus->repr == ECM_MOD_MODMULN)
    1913             :     {
    1914    15346253 :       mpz_set (modulus->temp1, S);
    1915    15346253 :       MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    1916    15346253 :       ecm_redc_basecase (R, modulus->temp1, modulus);
    1917    15346253 :       mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */
    1918             :     }
    1919      386659 :   else if (modulus->repr == ECM_MOD_REDC)
    1920             :     {
    1921      386659 :       REDC (R, S, modulus->temp1, modulus);
    1922      386659 :       mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */
    1923             :     }
    1924             : #ifdef DEBUG
    1925             :   else
    1926             :     {
    1927             :       fprintf (ECM_STDERR, "mpres_get_z: Unexpected representation %d\n", 
    1928             :                modulus->repr);
    1929             :       exit (EXIT_FAILURE);
    1930             :     }
    1931             : #endif
    1932    16723361 : }
    1933             : 
    1934             : /* R <- n mod modulus
    1935             :    If repr==ECM_MOD_MPZ or ECM_MOD_BASE2, we convert n to
    1936             :    Montgomery representation
    1937             :  */
    1938             : void 
    1939      287759 : mpres_set_ui (mpres_t R, const unsigned long n, mpmod_t modulus)
    1940             : {
    1941      287759 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1942             :     {
    1943       34058 :       mpz_set_ui (R, n);
    1944       34058 :       mpz_mod (R, R, modulus->orig_modulus);
    1945             :     }
    1946      253701 :   else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1947             :     {
    1948      253701 :       mpz_set_ui (modulus->temp1, n);
    1949      253701 :       mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
    1950      253701 :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1951             :     }
    1952             :   ASSERT_NORMALIZED (R);
    1953      287759 : }
    1954             : 
    1955             : /* same as previous but with signed long */
    1956             : void 
    1957          86 : mpres_set_si (mpres_t R, const long n, mpmod_t modulus)
    1958             : {
    1959          86 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    1960             :     {
    1961           3 :       mpz_set_si (R, n);
    1962           3 :       mpz_mod (R, R, modulus->orig_modulus);
    1963             :     }
    1964          83 :   else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC)
    1965             :     {
    1966          83 :       mpz_set_si (modulus->temp1, n);
    1967          83 :       mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits);
    1968          83 :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    1969             :     }
    1970             :   ASSERT_NORMALIZED (R);
    1971          86 : }
    1972             : 
    1973             : /* R <- -S mod modulus. Does not need to be efficient. */
    1974             : void
    1975     1038623 : mpres_neg (mpres_t R, const mpres_t S, ATTRIBUTE_UNUSED mpmod_t modulus)
    1976             : {
    1977             :   ASSERT_NORMALIZED (S);
    1978     1038623 :   mpz_neg (R, S);
    1979             :   ASSERT_NORMALIZED (R);
    1980     1038623 : }
    1981             : 
    1982             : /* Returns non-zero if inversion succeeded, and zero if not */
    1983             : 
    1984             : int 
    1985     2779612 : mpres_invert (mpres_t R, const mpres_t S, mpmod_t modulus)
    1986             : {
    1987             : #ifdef WANT_ASSERT_EXPENSIVE
    1988             :   mpres_t test;
    1989             :   mpz_t test_result;
    1990             :   mpres_init (test, modulus);
    1991             :   mpres_set (test, S, modulus);
    1992             : #endif
    1993             : 
    1994             :   ASSERT_NORMALIZED (S);
    1995             : 
    1996     2779612 :   if (mpz_invert (modulus->temp2, S, modulus->orig_modulus) == 0)
    1997         409 :     return 0;
    1998             :   
    1999     2779203 :   if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2)
    2000             :     {
    2001      284724 :       mpz_set (R, modulus->temp2);
    2002      284724 :       ASSERT_NORMALIZED (R);
    2003             :     }
    2004     2494479 :   else if (modulus->repr == ECM_MOD_MODMULN)
    2005             :     {
    2006     2227086 :       ecm_mulredc_basecase (R, modulus->temp2, modulus->R3, modulus);
    2007             :       ASSERT_NORMALIZED (R);
    2008             :     }
    2009      267393 :   else if (modulus->repr == ECM_MOD_REDC)
    2010             :     {
    2011             :       MPZ_NORMALIZED (S);
    2012      267393 :       mpz_mul (modulus->temp1, modulus->temp2, modulus->R3);
    2013      267393 :       REDC (R, modulus->temp1, modulus->temp2, modulus);
    2014             :       ASSERT_NORMALIZED (R);
    2015             :     }
    2016             : #ifdef DEBUG
    2017             :   else
    2018             :     {
    2019             :       fprintf (ECM_STDERR, "mpres_invert: Unexpected representation %d\n", 
    2020             :                modulus->repr);
    2021             :       exit (EXIT_FAILURE);
    2022             :     }
    2023             : #endif
    2024             : 
    2025             : #ifdef WANT_ASSERT_EXPENSIVE
    2026             :   mpres_mul (test, test, R, modulus);
    2027             :   mpz_init (test_result);
    2028             :   mpres_get_z (test_result, test, modulus);
    2029             :   ASSERT_ALWAYS(mpz_cmp_ui (test_result, 1UL) == 0);
    2030             :   mpz_clear (test_result);
    2031             :   mpres_clear (test, modulus);
    2032             : #endif
    2033             : 
    2034     2779203 :   return 1;
    2035             : }
    2036             : 
    2037             : void 
    2038      259287 : mpres_gcd (mpz_t R, const mpres_t S, const mpmod_t modulus)
    2039             : {
    2040             :   /* In MODMULN and REDC form, M(x) = x*R with gcd(R, modulus) = 1 .
    2041             :      Therefore gcd(M(x), modulus) = gcd(x, modulus) and we need not bother
    2042             :      to convert out of Montgomery form. */
    2043             :   ASSERT_NORMALIZED (S);
    2044      259287 :   mpz_gcd (R, S, modulus->orig_modulus);
    2045      259287 : }
    2046             : 
    2047             : #ifdef HAVE_ADDLAWS
    2048             : /* Returns non-zero if the two residues are equal, 
    2049             :    and zero if they are not */
    2050             : int 
    2051     1078060 : mpres_equal (const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    2052             : {
    2053     1078060 :   mpz_mod (modulus->temp1, S1, modulus->orig_modulus);
    2054     1078060 :   mpz_mod (modulus->temp2, S2, modulus->orig_modulus);
    2055     1078060 :   return (mpz_cmp (modulus->temp1, modulus->temp2) == 0);
    2056             : }
    2057             : #endif
    2058             : 
    2059             : #if 0 /* those routines are not called in normal operation */
    2060             : 
    2061             : void 
    2062             : mpres_out_str (FILE *fd, const unsigned int base, const mpres_t S, 
    2063             :                mpmod_t modulus)
    2064             : {
    2065             :   mpres_get_z (modulus->temp2, S, modulus);
    2066             :   mpz_out_str (fd, base, modulus->temp2);
    2067             : }
    2068             : 
    2069             : /* Multiplies S1 by the one-limb integer S2, and does modulo reduction.
    2070             :    The modulo reduction may imply multiplication of the residue class 
    2071             :    by some constant, since we may not do the correct number of REDC 
    2072             :    reduction passes and so fail to divide by the correct power of 2 for 
    2073             :    Montgomery representation. The constant is the same for each call
    2074             :    of this function with a given modulus, however. */
    2075             : static void 
    2076             : ecm_mulredc_1_basecase (mpres_t R, const mpres_t S1, const mp_limb_t S2, 
    2077             :                         mpmod_t modulus)
    2078             : {
    2079             :   mp_ptr s1p;
    2080             :   mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS;
    2081             : 
    2082             :   ASSERT(ALLOC(R) >= nn);
    2083             :   ASSERT(ALLOC(S1) >= nn);
    2084             :   s1p = PTR(S1);
    2085             :   for (j = ABSIZ(S1); j < nn; j++) 
    2086             :     s1p[j] = 0;
    2087             : 
    2088             : #ifdef HAVE_NATIVE_MULREDC1_N
    2089             :   if (nn < 20)
    2090             :     {
    2091             :       mp_ptr rp = PTR(R);
    2092             :       mulredc_1(rp, S2, s1p, PTR(modulus->orig_modulus), nn,
    2093             :                 modulus->Nprim[0]);
    2094             :       MPN_NORMALIZE (rp, nn);
    2095             :       SIZ(R) = (SIZ(S1)) < 0 ? (int) -nn : (int) nn;
    2096             :     }
    2097             :   else
    2098             : #endif
    2099             :     {
    2100             :       /* FIXME, we can do much better than this */
    2101             :       mpz_mul_ui (modulus->temp1, S1, S2);
    2102             :       mpz_mod(R, modulus->temp1, modulus->orig_modulus);
    2103             :     }
    2104             : }
    2105             : 
    2106             : /* Multiplies S by n and possibly divides by some constant. 
    2107             :    Whether or not it divides depends on the modulus representation and
    2108             :    the modulus size. */
    2109             : void 
    2110             : mpres_muldivbysomething_si (mpres_t R, const mpres_t S, const long n, 
    2111             :                             mpmod_t modulus)
    2112             : {
    2113             :   ASSERT_NORMALIZED (S);
    2114             :   if (modulus->repr == ECM_MOD_MODMULN && 
    2115             :       modulus->bits / GMP_NUMB_BITS <= 20)
    2116             :     /* FIXME: is the 20 here the same constant as in mulredc1_20?
    2117             :        If so, it should be changed into a macro. */
    2118             :     {
    2119             :       MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS);
    2120             :       if (n < 0)
    2121             :         {
    2122             :           ecm_mulredc_1_basecase (R, S, (mp_limb_t) -n, modulus);
    2123             :           mpres_neg (R, R, modulus);
    2124             :         }
    2125             :       else
    2126             :         {
    2127             :           ecm_mulredc_1_basecase (R, S, (mp_limb_t) n, modulus);
    2128             :         }
    2129             :     }
    2130             :   else
    2131             :     {
    2132             :       mpz_mul_si (modulus->temp1, S, n);
    2133             :       /* This is the same for all methods: just reduce with original modulus */
    2134             :       mpz_mod (R, modulus->temp1, modulus->orig_modulus);
    2135             :     }
    2136             :   ASSERT_NORMALIZED (R);
    2137             : }
    2138             : 
    2139             : /* Returns 1 if successful, 0 if not */
    2140             : static int
    2141             : test_mpres_set_z_for_gcd_fix (const int maxk, mpmod_t modulus)
    2142             : {
    2143             :   mpres_t m, prod;
    2144             :   mpz_t n;
    2145             :   int k;
    2146             : 
    2147             :   mpres_init (prod, modulus);
    2148             :   mpres_init (m, modulus);
    2149             :   mpz_init (n);
    2150             : 
    2151             :   /* m = 1 * c, where c is a constant depending on mod reduction method */
    2152             :   mpz_set_ui (n, 1);
    2153             :   mpres_set_z_for_gcd (m, n, modulus);
    2154             : 
    2155             :   for (k = 0; k <= maxk; k++)
    2156             :     {
    2157             :       int i;
    2158             :       mpres_set_ui (prod, 1, modulus);
    2159             : 
    2160             :       /* Compute prod = 1 * (1 * c)^k */
    2161             :       for (i = 0; i < k; i++)
    2162             :         mpres_mul (prod, prod, m, modulus);
    2163             : 
    2164             :       /* Divide prod by c^k */ 
    2165             :       mpz_set_ui (n, k);
    2166             :       mpres_set_z_for_gcd_fix (prod, prod, n, modulus);
    2167             : 
    2168             :       /* Result should be 1 */
    2169             :       mpres_get_z (n, prod, modulus);
    2170             :       if (mpz_cmp_ui (n, 1) != 0) {
    2171             :         gmp_printf ("Error: test of mpres_set_z_for_gcd_fix() failed, k = %d, n = %Zd\n", k, n);
    2172             :         return 0;
    2173             :       }
    2174             :     }
    2175             : 
    2176             :   mpz_clear (n);
    2177             :   mpres_clear (m, modulus);
    2178             :   mpres_clear (prod, modulus);
    2179             :   return 1;
    2180             : }
    2181             : 
    2182             : int
    2183             : mpmod_selftest (const mpz_t n)
    2184             : {
    2185             :   mpres_t test1, test2;
    2186             :   mpmod_t modulus;
    2187             :   
    2188             :   printf ("Performing self test of modular arithmetic\n");
    2189             :   mpmod_init (modulus, n, 0);
    2190             :   mpres_init (test1, modulus);
    2191             :   mpres_init (test2, modulus);
    2192             :   mpres_set_ui (test1, 2, modulus);
    2193             :   mpres_set_ui (test2, 5, modulus);
    2194             :   mpres_muldivbysomething_si (test1, test1, 5, modulus);
    2195             :   mpres_muldivbysomething_si (test2, test2, 2, modulus);
    2196             :   if (!mpres_equal (test1, test2, modulus))
    2197             :    {
    2198             :      printf ("mpres_muldivbysomething_si() wrong\n");
    2199             :      fflush (stdout);
    2200             :      abort();
    2201             :    }
    2202             :   mpres_clear (test1, modulus);
    2203             :   mpres_clear (test2, modulus);
    2204             : 
    2205             :   if (!test_mpres_set_z_for_gcd_fix(10, modulus))
    2206             :     abort();
    2207             : 
    2208             :   mpmod_clear (modulus);
    2209             :   
    2210             :   return 0;
    2211             : }
    2212             : #endif /* #if 0 */
    2213             : 
    2214             : /****************************************************/
    2215             : /* mpresn: modular arithmetic based directly on mpn */
    2216             : /****************************************************/
    2217             : 
    2218             : /* We use here a signed word-based redundant representation.
    2219             : 
    2220             :    In case N < B^n/16 (since for redc where we add to the absolute value of
    2221             :    the residue), where n is the number of limbs of N in base B (2^32 or 2^64
    2222             :    usually), we can prove there is no adjustment (adding or subtracting N),
    2223             :    cf http://www.loria.fr/~zimmerma/papers/norm.pdf.
    2224             : 
    2225             :    However current branch predictors are quite good, thus we prefer to keep
    2226             :    the tests and to allow any input N (instead of only N < B^n/16).
    2227             : */
    2228             : 
    2229             : /* ensure R has allocated space for at least n limbs,
    2230             :    and if less than n limbs are used, pad with zeros,
    2231             :    and set SIZ(R) to n if positive or -n if negative */
    2232             : void
    2233         656 : mpresn_pad (mpres_t R, mpmod_t N)
    2234             : {
    2235         656 :   mp_size_t n = ABSIZ(N->orig_modulus);
    2236             :   mp_size_t rn;
    2237             : 
    2238         656 :   _mpz_realloc (R, n);
    2239         656 :   rn = mpz_size (R);
    2240         656 :   ASSERT_ALWAYS (rn <= n);
    2241         656 :   if (rn < n)
    2242             :     {
    2243         768 :       MPN_ZERO (PTR(R) + rn, n - rn);
    2244         256 :       SIZ(R) = SIZ(R) >= 0 ? n : -n;
    2245             :     }
    2246         656 : }
    2247             : 
    2248             : void
    2249         304 : mpresn_unpad (mpres_t R)
    2250             : {
    2251         304 :   mp_size_t n = ABSIZ(R);
    2252             : 
    2253         332 :   while (n > 0 && PTR(R)[n-1] == 0)
    2254          28 :     n--;
    2255         304 :   SIZ(R) = SIZ(R) >= 0 ? n : -n;
    2256         304 : }
    2257             : 
    2258             : /* R <- S1 * S1 mod N, used only for ECM_MOD_MODMULN */
    2259             : void 
    2260    54413472 : mpresn_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus)
    2261             : {
    2262    54413472 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2263             : 
    2264             :   ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
    2265             : 
    2266    54413472 :   ecm_sqrredc_basecase_n (PTR(R), PTR(S1), PTR(modulus->orig_modulus),
    2267             :                           n, modulus->Nprim, PTR(modulus->temp1));
    2268             : 
    2269    54413472 :   SIZ(R) = n;
    2270    54413472 : }
    2271             : 
    2272             : /* R <- S1 * S2 mod N, used only for ECM_MOD_MODMULN */
    2273             : void 
    2274    55292472 : mpresn_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    2275             : {
    2276    55292472 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2277             : 
    2278             :   ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
    2279             :   ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
    2280             : 
    2281    55292472 :   ecm_mulredc_basecase_n (PTR(R), PTR(S1), PTR(S2), PTR(modulus->orig_modulus),
    2282             :                           n, modulus->Nprim, PTR(modulus->temp1));
    2283             : 
    2284    55292472 :   SIZ(R) = SIZ(S1) == SIZ(S2) ? n : -n;
    2285    55292472 : }
    2286             : 
    2287             : /* R <- S*m/B mod modulus where m fits in a mp_limb_t.
    2288             :    Here S (w in dup_add_batch1) is the result of a subtraction,
    2289             :    thus with the notations from http://www.loria.fr/~zimmerma/papers/norm.pdf
    2290             :    we have S < 2 \alpha N.
    2291             :    Then R < (2 \alpha N \beta + \beta N) = (2 \alpha + 1) N.
    2292             :    This result R is used in an addition with u being the result of a squaring
    2293             :    thus u < \alpha N, which gives a result < (3 \alpha + 1) N.
    2294             :    Finally this result is used in a multiplication with another operand less
    2295             :    than 2 \alpha N, thus we want:
    2296             :    ((2 \alpha) (3 \alpha + 1) N^2 + \beta N)/\beta \leq \alpha N, i.e.,
    2297             :    2 \alpha (3 \alpha + 1) \varepsilon + 1 \leq \alpha
    2298             :    This implies \varepsilon \leq 7/2 - sqrt(3)/2 ~ 0.0359, in which case
    2299             :    we can take \alpha = 2/3*sqrt(3)+1 ~ 2.1547.
    2300             :    In that case no adjustment is needed in mpresn_mul_1.
    2301             :    However we prefer to keep the adjustment here, to allow a larger set of
    2302             :    inputs (\varepsilon \leq 1/16 = 0.0625 instead of 0.0359).
    2303             : */
    2304             : void 
    2305    12724368 : mpresn_mul_1 (mpres_t R, const mpres_t S, const mp_limb_t m, mpmod_t modulus)
    2306             : {
    2307    12724368 :   mp_ptr t1 = PTR(modulus->temp1);
    2308    12724368 :   mp_ptr t2 = PTR(modulus->temp2);
    2309    12724368 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2310             :   mp_limb_t q;
    2311             : 
    2312             :   ASSERT (SIZ(S) == n || -SIZ(S) == n);
    2313             :   ASSERT (ALLOC(modulus->temp1) >= n+1);
    2314             : 
    2315             : #if defined(USE_ASM_REDC) && defined(HAVE_NATIVE_MULREDC1_N)
    2316    12724368 :   if (n <= MULREDC_ASSEMBLY_MAX)
    2317    12724368 :     mulredc_1 (PTR(R), m, PTR(S), PTR(modulus->orig_modulus), n,
    2318    12724368 :                modulus->Nprim[0]);
    2319             :   else
    2320             : #endif
    2321             :     {
    2322           0 :       t1[n] = mpn_mul_1 (t1, PTR(S), n, m);
    2323           0 :       q = t1[0] * modulus->Nprim[0];
    2324           0 :       t2[n] = mpn_mul_1 (t2, PTR(modulus->orig_modulus), n, q);
    2325             : #ifdef HAVE___GMPN_ADD_NC
    2326           0 :       q = __gmpn_add_nc (PTR(R), t1 + 1, t2 + 1, n, t1[0] != 0);
    2327             : #else
    2328             :       q = mpn_add_n (PTR(R), t1 + 1, t2 + 1, n);
    2329             :       q += mpn_add_1 (PTR(R), PTR(R), n, t1[0] != 0);
    2330             : #endif
    2331           0 :       while (q != 0)
    2332           0 :         q -= mpn_sub_n (PTR(R), PTR(R), PTR(modulus->orig_modulus), n);
    2333             :     }
    2334             : 
    2335    12724368 :   SIZ(R) = SIZ(S); /* sign is unchanged */
    2336    12724368 : }
    2337             : 
    2338             : /* R <- S1 + S2 mod modulus */
    2339             : /* we assume all numbers are allocated to n limbs, and unused most significant
    2340             :    limbs are set to zero */
    2341             : void
    2342    27206736 : mpresn_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    2343             : {
    2344    27206736 :   mp_ptr r = PTR(R);
    2345    27206736 :   mp_ptr s1 = PTR(S1);
    2346    27206736 :   mp_ptr s2 = PTR(S2);
    2347    27206736 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2348             :   ATTRIBUTE_UNUSED mp_limb_t cy;
    2349             : 
    2350             :   ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
    2351             :   ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
    2352             : 
    2353    27206736 :   if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */
    2354             :     {
    2355    20402073 :       cy = mpn_add_n (r, s1, s2, n);
    2356             :       /* for N < B^n/16, the while loop will be never performed, which proves
    2357             :          it will be performed a small number of times. In practice we
    2358             :          observed up to 7 loops, but it happens rarely. */
    2359             : #ifndef MPRESN_NO_ADJUSTMENT
    2360    20741025 :       while (cy != 0)
    2361      338952 :         cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
    2362             : #endif
    2363    20402073 :       SIZ(R) = SIZ(S1);
    2364             :     }
    2365             :   else /* different signs */
    2366             :     {
    2367     6804663 :       if (mpn_cmp (s1, s2, n) >= 0)
    2368             :         {
    2369     4485656 :           mpn_sub_n (r, s1, s2, n); /* no borrow here */
    2370     4485656 :           SIZ(R) = SIZ(S1);
    2371             :         }
    2372             :       else
    2373             :         {
    2374     2319007 :           mpn_sub_n (r, s2, s1, n); /* idem */
    2375     2319007 :           SIZ(R) = SIZ(S2);
    2376             :         }
    2377             :     }
    2378    27206736 : }
    2379             : 
    2380             : void
    2381    13603368 : mpresn_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    2382             : {
    2383    13603368 :   mp_ptr r = PTR(R);
    2384    13603368 :   mp_ptr s1 = PTR(S1);
    2385    13603368 :   mp_ptr s2 = PTR(S2);
    2386    13603368 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2387             :   ATTRIBUTE_UNUSED mp_limb_t cy;
    2388             : 
    2389             :   ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
    2390             :   ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
    2391             : 
    2392    13603368 :   if (SIZ(S1) != SIZ(S2)) /* S1 and S2 are of different signs */
    2393             :     {
    2394           0 :       cy = mpn_add_n (r, s1, s2, n);
    2395             : #ifndef MPRESN_NO_ADJUSTMENT
    2396           0 :       while (cy != 0)
    2397           0 :         cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
    2398             : #endif
    2399           0 :       SIZ(R) = SIZ(S1);
    2400             :     }
    2401             :   else /* same signs, it's a real subtraction */
    2402             :     {
    2403    13603368 :       if (mpn_cmp (s1, s2, n) >= 0)
    2404             :         {
    2405     6798705 :           mpn_sub_n (r, s1, s2, n); /* no borrow here */
    2406     6798705 :           SIZ(R) = SIZ(S1);
    2407             :         }
    2408             :       else
    2409             :         {
    2410     6804663 :           mpn_sub_n (r, s2, s1, n); /* idem */
    2411     6804663 :           SIZ(R) = -SIZ(S2);
    2412             :         }
    2413             :       
    2414             :     }
    2415    13603368 : }
    2416             : 
    2417             : /* (R, T) <- (S1 + S2, S1 - S2)
    2418             :    Assume R differs from both S1 and S2.
    2419             :  */
    2420             : void
    2421    40810104 : mpresn_addsub (mpres_t R, mpres_t T,
    2422             :                const mpres_t S1, const mpres_t S2, mpmod_t modulus)
    2423             : {
    2424    40810104 :   mp_ptr r = PTR(R);
    2425    40810104 :   mp_ptr t = PTR(T);
    2426    40810104 :   mp_ptr s1 = PTR(S1);
    2427    40810104 :   mp_ptr s2 = PTR(S2);
    2428    40810104 :   mp_size_t n = ABSIZ(modulus->orig_modulus);
    2429             :   ATTRIBUTE_UNUSED mp_limb_t cy;
    2430             : 
    2431             :   ASSERT (R != S1);
    2432             :   ASSERT (R != S2);
    2433             :   ASSERT (SIZ(S1) == n || -SIZ(S1) == n);
    2434             :   ASSERT (SIZ(S2) == n || -SIZ(S2) == n);
    2435             : 
    2436    40810104 :   if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */
    2437             :     {
    2438    29520056 :       cy = mpn_add_n (r, s1, s2, n);
    2439             : #ifndef MPRESN_NO_ADJUSTMENT
    2440    30069824 :       while (cy != 0)
    2441      549768 :         cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n);
    2442             : #endif
    2443    29520056 :       SIZ(R) = SIZ(S1);
    2444    29520056 :       if (mpn_cmp (s1, s2, n) >= 0)
    2445             :         {
    2446    11486626 :           mpn_sub_n (t, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */
    2447    11486626 :           SIZ(T) = SIZ(S1);
    2448             :         }
    2449             :       else
    2450             :         {
    2451    18033430 :           mpn_sub_n (t, s2, s1, n); /* idem since {s2,n} >= {s1,n} */
    2452    18033430 :           SIZ(T) = -SIZ(S2);
    2453             :         }
    2454             :     }
    2455             :   else /* different signs */
    2456             :     {
    2457    11290048 :       if (mpn_cmp (s1, s2, n) >= 0)
    2458             :         {
    2459     5649097 :           mpn_sub_n (r, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */
    2460     5649097 :           SIZ(R) = SIZ(S1);
    2461             :         }
    2462             :       else
    2463             :         {
    2464     5640951 :           mpn_sub_n (r, s2, s1, n); /* idem since {s2,n} >= {s1,n} */
    2465     5640951 :           SIZ(R) = SIZ(S2);
    2466             :         }
    2467    11290048 :       cy = mpn_add_n (t, s1, s2, n);
    2468             : #ifndef MPRESN_NO_ADJUSTMENT
    2469    11501656 :       while (cy != 0)
    2470      211608 :         cy -= mpn_sub_n (t, t, PTR(modulus->orig_modulus), n);
    2471             : #endif
    2472    11290048 :       SIZ(T) = SIZ(S1);
    2473             :     }
    2474    40810104 : }

Generated by: LCOV version 1.14