LCOV - code coverage report
Current view: top level - ecm - batch.c (source / functions) Hit Total Coverage
Test: unnamed Lines: 125 139 89.9 %
Date: 2022-03-21 11:19:20 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /* batch.c - Implement batch mode for step 1 of ECM
       2             :  
       3             : Copyright 2011, 2012, 2016 Cyril Bouvier, Paul Zimmermann and David Cleaver.
       4             :  
       5             : This file is part of the ECM Library.
       6             : 
       7             : The ECM Library is free software; you can redistribute it and/or modify
       8             : it under the terms of the GNU Lesser General Public License as published by
       9             : the Free Software Foundation; either version 3 of the License, or (at your
      10             : option) any later version.
      11             : 
      12             : The ECM Library is distributed in the hope that it will be useful, but
      13             : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      14             : or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      15             : License for more details.
      16             : 
      17             : You should have received a copy of the GNU Lesser General Public License
      18             : along with the ECM Library; see the file COPYING.LIB.  If not, see
      19             : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      20             : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      21             : 
      22             : 
      23             : /* ECM stage 1 in batch mode, for initial point (x:z) with small coordinates,
      24             :    such that x and z fit into a mp_limb_t.
      25             :    For example we can start with (x=2:y=1) with the curve by^2 = x^3 + ax^2 + x
      26             :    with a = 4d-2 and b=16d+2, then we have to multiply by d=(a+2)/4 in the
      27             :    duplicates.
      28             :    With the change of variable x=b*X, y=b*Y, this curve becomes:
      29             :    Y^2 = X^3 + a/b*X^2 + 1/b^2*X.
      30             : */
      31             : 
      32             : #include <stdlib.h>
      33             : #include "ecm-impl.h"
      34             : #include "getprime_r.h"
      35             : 
      36             : #define MAX_HEIGHT 32
      37             : 
      38             : #if ECM_UINT_MAX == 4294967295
      39             : /* On a 32-bit machine, with no access to a 64-bit type,
      40             :     the maximum value that can be returned by mpz_sizeinbase(s,2)
      41             :     is = (2^32-1).  Multiplying all primes up to the following
      42             :     will result in a product that has (2^32-1) bits. */
      43             : #define MAX_B1_BATCH 2977044736UL
      44             : #elif defined(_WIN32) && __GNU_MP_VERSION <= 6 && !defined(__MPIR_VERSION)
      45             : /* Due to a limitation in GMP on 64-bit Windows, should also
      46             :     affect 32-bit Windows, sufficient memory cannot be allocated
      47             :     for the batch product s when using primes larger than the following */
      48             : #define MAX_B1_BATCH 3124253146UL
      49             : #else
      50             : /* nth_prime(2^(MAX_HEIGHT-1))-1 */
      51             : #define MAX_B1_BATCH 50685770166ULL
      52             : #endif
      53             : 
      54             : /* If forbiddenres != NULL, forbiddenres = "m r_1 ... r_k -1" indicating that
      55             :    if p = r_i mod m, then p^2 should be considered instead of p. This has
      56             :    only a sense for CM curves. We assume r_1 < r_2 < ... < r_k.
      57             :    Typical example: "4 3 -1" for curves Y^2 = X^3 + a * X.
      58             : */
      59             : void
      60         137 : compute_s (mpz_t s, ecm_uint B1, int *forbiddenres ATTRIBUTE_UNUSED)
      61             : {
      62             :   mpz_t acc[MAX_HEIGHT]; /* To accumulate products of prime powers */
      63             :   mpz_t ppz;
      64             :   unsigned int i, j;
      65         137 :   ecm_uint pi = 2, pp, maxpp, qi;
      66             :   prime_info_t prime_info;
      67             : 
      68         137 :   prime_info_init (prime_info);
      69             : 
      70         137 :   ASSERT_ALWAYS (B1 <= MAX_B1_BATCH);
      71             : 
      72        4521 :   for (j = 0; j < MAX_HEIGHT; j++)
      73        4384 :     mpz_init (acc[j]); /* sets acc[j] to 0 */
      74         137 :   mpz_init (ppz);
      75             : 
      76         137 :   i = 0;
      77      775777 :   while (pi <= B1)
      78             :     {
      79      775640 :       pp = qi = pi;
      80      775640 :       maxpp = B1 / qi;
      81             : #ifdef HAVE_ADDLAWS
      82      775640 :       if (forbiddenres != NULL && pi > 2)
      83             :         {
      84             :           /* non splitting primes can occur in even powers only */
      85           0 :           int rp = (int)(pi % forbiddenres[0]);
      86           0 :           for (j = 1; forbiddenres[j] >= 0; j++)
      87           0 :             if (rp >= forbiddenres[j])
      88           0 :               break;
      89           0 :           if (rp == forbiddenres[j])
      90             :             {
      91             :               /* printf("p=%lu is forbidden\n", pi); */
      92           0 :               if (qi <= maxpp)
      93             :                 {
      94             :                   /* qi <= B1/qi => qi^2 <= B1, let it go */
      95           0 :                   qi *= qi;
      96             :                 }
      97             :               else
      98             :                 {
      99             :                   /* qi is too large, do not increment i */
     100           0 :                   pi = getprime_mt (prime_info);
     101           0 :                   continue;
     102             :                 }
     103             :             }
     104             :         }
     105             : #endif
     106      783320 :       while (pp <= maxpp)
     107        7680 :           pp *= qi;
     108             : 
     109             : #if ECM_UINT_MAX == 4294967295
     110             :       mpz_set_ui (ppz, pp);
     111             : #else
     112      775640 :       mpz_set_uint64 (ppz, pp);
     113             : #endif
     114             : 
     115      775640 :       if ((i & 1) == 0)
     116      387872 :         mpz_set (acc[0], ppz);
     117             :       else
     118      387768 :         mpz_mul (acc[0], acc[0], ppz);
     119             : 
     120      775640 :       j = 0;
     121             :       /* We have accumulated i+1 products so far. If bits 0..j of i are all
     122             :          set, then i+1 is a multiple of 2^(j+1). */
     123     1550488 :       while ((i & (1 << j)) != 0)
     124             :         {
     125             :           /* we use acc[MAX_HEIGHT-1] as 0-sentinel below, thus we need
     126             :              j+1 < MAX_HEIGHT-1 */
     127             :           ASSERT (j + 1 < MAX_HEIGHT - 1);
     128      774848 :           if ((i & (1 << (j + 1))) == 0) /* i+1 is not multiple of 2^(j+2),
     129             :                                             thus add[j+1] is "empty" */
     130      387768 :             mpz_swap (acc[j+1], acc[j]); /* avoid a copy with mpz_set */
     131             :           else
     132      387080 :             mpz_mul (acc[j+1], acc[j+1], acc[j]); /* accumulate in acc[j+1] */
     133      774848 :           mpz_set_ui (acc[j], 1);
     134      774848 :           j++;
     135             :         }
     136             : 
     137      775640 :       i++;
     138      775640 :       pi = getprime_mt (prime_info);
     139             :     }
     140             : 
     141        1417 :   for (mpz_set (s, acc[0]), j = 1; mpz_cmp_ui (acc[j], 0) != 0; j++)
     142        1280 :     mpz_mul (s, s, acc[j]);
     143             : 
     144         137 :   prime_info_clear (prime_info); /* free the prime tables */
     145             :   
     146        4521 :   for (i = 0; i < MAX_HEIGHT; i++)
     147        4384 :     mpz_clear (acc[i]);
     148         137 :   mpz_clear (ppz);
     149         137 : }
     150             : 
     151             : #if 0
     152             : /* this function is useful in debug mode to print non-normalized residues */
     153             : static void
     154             : mpresn_print (mpres_t x, mpmod_t n)
     155             : {
     156             :   mp_size_t m, xn;
     157             : 
     158             :   xn = SIZ(x);
     159             :   m = ABSIZ(x);
     160             :   MPN_NORMALIZE(PTR(x), m);
     161             :   SIZ(x) = xn >= 0 ? m : -m;
     162             :   gmp_printf ("%Zd\n", x);
     163             :   SIZ(x) = xn;
     164             : }
     165             : #endif
     166             : 
     167             : /* (x1:z1) <- 2(x1:z1)
     168             :    (x2:z2) <- (x1:z1) + (x2:z2)
     169             :    assume (x2:z2) - (x1:z1) = (2:1)
     170             :    Uses 4 modular multiplies and 4 modular squarings.
     171             :    Inputs are x1, z1, x2, z2, d, n.
     172             :    Use two auxiliary variables: t, w (it seems using one only is not possible
     173             :    if all mpresn_mul and mpresn_sqr calls don't overlap input and output).
     174             : 
     175             :    In the batch 1 mode, we pass d_prime such that the actual d is d_prime/beta.
     176             :    Since beta is a square, if d_prime is a square (on 64-bit machines),
     177             :    so is d.
     178             :    In mpresn_mul_1, we multiply by d_prime = beta*d and divide by beta.
     179             : */
     180             : static void
     181    12724368 : dup_add_batch1 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
     182             :                 mpres_t t, mpres_t w, mp_limb_t d_prime, mpmod_t n)
     183             : {
     184             :   /* active: x1 z1 x2 z2 */
     185    12724368 :   mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
     186             :   /* active: w z1 x2 z2 */
     187    12724368 :   mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
     188             :   /* active: w z1 x1 x2 */
     189             : 
     190    12724368 :   mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
     191             :   /* active: w z1 x1 z2 */
     192    12724368 :   mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
     193             :   /* active: w z1 x2 z2 */
     194    12724368 :   mpresn_sqr (t, z1, n);    /* t = (x1-z1)^2 */
     195             :   /* active: w t x2 z2 */
     196    12724368 :   mpresn_sqr (z1, w, n);    /* z1 = (x1+z1)^2 */
     197             :   /* active: z1 t x2 z2 */
     198             : 
     199    12724368 :   mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
     200             :   /* active: x1 z1 t x2 z2 */
     201             : 
     202    12724368 :   mpresn_sub (w, z1, t, n);   /* w = (x1+z1)^2 - (x1-z1)^2 */
     203             :   /* active: x1 w t x2 z2 */
     204             : 
     205    12724368 :   mpresn_mul_1 (z1, w, d_prime, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
     206             :   /* active: x1 z1 w t x2 z2 */
     207             : 
     208    12724368 :   mpresn_add (t, t, z1, n);  /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
     209             :   /* active: x1 w t x2 z2 */
     210    12724368 :   mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
     211             :   /* active: x1 z1 x2 z2 */
     212             : 
     213    12724368 :   mpresn_addsub (w, z2, x2, z2, n);
     214             :   /* active: x1 z1 w z2 */
     215             : 
     216    12724368 :   mpresn_sqr (x2, w, n);
     217             :   /* active: x1 z1 x2 z2 */
     218    12724368 :   mpresn_sqr (w, z2, n);
     219             :   /* active: x1 z1 x2 w */
     220    12724368 :   mpresn_add (z2, w, w, n);
     221    12724368 : }
     222             : 
     223             : static void
     224      879000 : dup_add_batch2 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
     225             :                 mpres_t t, mpres_t w, mpres_t d, mpmod_t n)
     226             : {
     227             :   /* active: x1 z1 x2 z2 */
     228      879000 :   mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
     229             :   /* active: w z1 x2 z2 */
     230      879000 :   mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
     231             :   /* active: w z1 x1 x2 */
     232             : 
     233      879000 :   mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
     234             :   /* active: w z1 x1 z2 */
     235      879000 :   mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
     236             :   /* active: w z1 x2 z2 */
     237      879000 :   mpresn_sqr (t, z1, n);    /* t = (x1-z1)^2 */
     238             :   /* active: w t x2 z2 */
     239      879000 :   mpresn_sqr (z1, w, n);    /* z1 = (x1+z1)^2 */
     240             :   /* active: z1 t x2 z2 */
     241             : 
     242      879000 :   mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
     243             :   /* active: x1 z1 t x2 z2 */
     244             : 
     245      879000 :   mpresn_sub (w, z1, t, n);   /* w = (x1+z1)^2 - (x1-z1)^2 */
     246             :   /* active: x1 w t x2 z2 */
     247             : 
     248      879000 :   mpresn_mul (z1, w, d, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
     249             :   /* active: x1 z1 w t x2 z2 */
     250             : 
     251      879000 :   mpresn_add (t, t, z1, n);  /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
     252             :   /* active: x1 w t x2 z2 */
     253      879000 :   mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
     254             :   /* active: x1 z1 x2 z2 */
     255             : 
     256      879000 :   mpresn_addsub (w, z2, x2, z2, n);
     257             :   /* active: x1 z1 w z2 */
     258             : 
     259      879000 :   mpresn_sqr (x2, w, n);
     260             :   /* active: x1 z1 x2 z2 */
     261      879000 :   mpresn_sqr (w, z2, n);
     262             :   /* active: x1 z1 x2 w */
     263      879000 :   mpresn_add (z2, w, w, n);
     264      879000 : }
     265             : 
     266             : 
     267             : /* Input: x is initial point
     268             :           A is curve parameter in Montgomery's form:
     269             :           g*y^2*z = x^3 + a*x^2*z + x*z^2
     270             :           n is the number to factor
     271             :           B1 is the stage 1 bound
     272             :    Output: If a factor is found, it is returned in x.
     273             :            Otherwise, x contains the x-coordinate of the point computed
     274             :            in stage 1 (with z coordinate normalized to 1).
     275             :            B1done is set to B1 if stage 1 completed normally,
     276             :            or to the largest prime processed if interrupted, but never
     277             :            to a smaller value than B1done was upon function entry.
     278             :    Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise 
     279             :            ECM_NO_FACTOR_FOUND
     280             : */
     281             : /*
     282             : For now we don't take into account go stop_asap and chkfilename
     283             : */
     284             : int
     285         152 : ecm_stage1_batch (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1,
     286             :                   double *B1done, int batch, mpz_t s)
     287             : {
     288             :   mp_limb_t d_1;
     289             :   mpz_t d_2;
     290             : 
     291             :   mpres_t x1, z1, x2, z2;
     292             :   ecm_uint i;
     293             :   mpres_t t, u;
     294         152 :   int ret = ECM_NO_FACTOR_FOUND;
     295             : 
     296         152 :   mpres_init (x1, n);
     297         152 :   mpres_init (z1, n);
     298         152 :   mpres_init (x2, n);
     299         152 :   mpres_init (z2, n);
     300         152 :   mpres_init (t, n);
     301         152 :   mpres_init (u, n);
     302         152 :   if (batch == ECM_PARAM_BATCH_2)
     303          48 :     mpres_init (d_2, n);
     304             : 
     305             :   /* initialize P */
     306         152 :   mpres_set (x1, x, n);
     307         152 :   mpres_set_ui (z1, 1, n); /* P1 <- 1P */
     308             : 
     309             :   /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */
     310         152 :   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
     311             :     {
     312         104 :       mpres_get_z (u, A, n);
     313         104 :       mpz_add_ui (u, u, 2);
     314         104 :       mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2);
     315         104 :       mpres_set_z_for_gcd (u, u, n); /* reduces u mod n */
     316         104 :       if (mpz_size (u) > 1)
     317             :         {
     318           0 :           mpres_get_z (u, A, n);
     319           0 :           outputf (OUTPUT_ERROR,
     320             :                "Error, 2^%d*(A+2) should fit in a mp_limb_t, A=%Zd\n",
     321             :                    GMP_NUMB_BITS - 2, u);
     322           0 :           return ECM_ERROR;
     323             :         }
     324         104 :       d_1 = mpz_getlimbn (u, 0);
     325             :     }
     326             :   else
     327             :     {
     328             :       /* b = (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS)
     329             :          for MODMULN or REDC, B=2^GMP_NUMB_BITS for batch1,
     330             :          and B=1 otherwise */
     331          48 :       mpres_add_ui (d_2, A, 2, n);
     332          48 :       mpres_div_2exp (d_2, d_2, 2, n); 
     333             :     }
     334             : 
     335             :   /* Compute 2P : no need to duplicate P, the coordinates are simple. */
     336         152 :   mpres_set_ui (x2, 9, n);
     337             :   /* here d = d_1 / GMP_NUMB_BITS */
     338         152 :   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
     339             :     {
     340             :       /* warning: mpres_set_ui takes an unsigned long which has only 32 bits
     341             :          on Windows, while d_1 might have 64 bits */
     342         104 :       ASSERT_ALWAYS (mpz_size (u) == 1 && mpz_getlimbn (u, 0) == d_1);
     343         104 :       mpres_set_z (z2, u, n);
     344         104 :       mpres_div_2exp (z2, z2, GMP_NUMB_BITS, n);
     345             :     }
     346             :   else
     347          48 :       mpres_set (z2, d_2, n);
     348             :  
     349         152 :   mpres_mul_2exp (z2, z2, 6, n);
     350         152 :   mpres_add_ui (z2, z2, 8, n); /* P2 <- 2P = (9 : : 64d+8) */
     351             : 
     352             :   /* invariant: if j represents the upper bits of s,
     353             :      then P1 = j*P and P2=(j+1)*P */
     354             : 
     355         152 :   mpresn_pad (x1, n);
     356         152 :   mpresn_pad (z1, n);
     357         152 :   mpresn_pad (x2, n);
     358         152 :   mpresn_pad (z2, n);
     359             : 
     360             :   /* now perform the double-and-add ladder */
     361         152 :   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
     362             :     {
     363    12724472 :       for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
     364             :         {
     365    12724368 :           if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
     366             :             /* P2 <- P1+P2    P1 <- 2*P1 */
     367     6366560 :             dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n);
     368             :           else /* (j,j+1) -> (2j+1,2j+2) */
     369             :               /* P1 <- P1+P2     P2 <- 2*P2 */
     370     6357808 :             dup_add_batch1 (x2, z2, x1, z1, t, u, d_1, n);
     371             :         }
     372             :     }
     373             :   else /* batch = ECM_PARAM_BATCH_2 */
     374             :     {
     375          48 :       mpresn_pad (d_2, n);
     376      879048 :       for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
     377             :         {
     378      879000 :           if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
     379             :             /* P2 <- P1+P2    P1 <- 2*P1 */
     380      441376 :             dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n);
     381             :           else /* (j,j+1) -> (2j+1,2j+2) */
     382             :               /* P1 <- P1+P2     P2 <- 2*P2 */
     383      437624 :             dup_add_batch2 (x2, z2, x1, z1, t, u, d_2, n);
     384             :         }
     385             :     }
     386             : 
     387         152 :   *B1done=B1;
     388             : 
     389         152 :   mpresn_unpad (x1);
     390         152 :   mpresn_unpad (z1);
     391             : 
     392         152 :   if (!mpres_invert (u, z1, n)) /* Factor found? */
     393             :     {
     394           0 :       mpres_gcd (f, z1, n);
     395           0 :       ret = ECM_FACTOR_FOUND_STEP1;
     396             :     }
     397         152 :   mpres_mul (x, x1, u, n);
     398             : 
     399         152 :   mpz_clear (x1);
     400         152 :   mpz_clear (z1);
     401         152 :   mpz_clear (x2);
     402         152 :   mpz_clear (z2);
     403         152 :   mpz_clear (t);
     404         152 :   mpz_clear (u);
     405         152 :   if (batch == ECM_PARAM_BATCH_2)
     406          48 :       mpz_clear (d_2);
     407             : 
     408         152 :   return ret;
     409             : }

Generated by: LCOV version 1.14