Line data Source code
1 : /* batch.c - Implement batch mode for step 1 of ECM
2 :
3 : Copyright 2011, 2012, 2016 Cyril Bouvier, Paul Zimmermann and David Cleaver.
4 :
5 : This file is part of the ECM Library.
6 :
7 : The ECM Library is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU Lesser General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or (at your
10 : option) any later version.
11 :
12 : The ECM Library is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 : License for more details.
16 :
17 : You should have received a copy of the GNU Lesser General Public License
18 : along with the ECM Library; see the file COPYING.LIB. If not, see
19 : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
20 : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
21 :
22 :
23 : /* ECM stage 1 in batch mode, for initial point (x:z) with small coordinates,
24 : such that x and z fit into a mp_limb_t.
25 : For example we can start with (x=2:y=1) with the curve by^2 = x^3 + ax^2 + x
26 : with a = 4d-2 and b=16d+2, then we have to multiply by d=(a+2)/4 in the
27 : duplicates.
28 : With the change of variable x=b*X, y=b*Y, this curve becomes:
29 : Y^2 = X^3 + a/b*X^2 + 1/b^2*X.
30 : */
31 :
32 : #include <stdlib.h>
33 : #include "ecm-impl.h"
34 : #include "getprime_r.h"
35 :
36 : #define MAX_HEIGHT 32
37 :
38 : #if ECM_UINT_MAX == 4294967295
39 : /* On a 32-bit machine, with no access to a 64-bit type,
40 : the maximum value that can be returned by mpz_sizeinbase(s,2)
41 : is = (2^32-1). Multiplying all primes up to the following
42 : will result in a product that has (2^32-1) bits. */
43 : #define MAX_B1_BATCH 2977044736UL
44 : #elif defined(_WIN32) && __GNU_MP_VERSION <= 6 && !defined(__MPIR_VERSION)
45 : /* Due to a limitation in GMP on 64-bit Windows, should also
46 : affect 32-bit Windows, sufficient memory cannot be allocated
47 : for the batch product s when using primes larger than the following */
48 : #define MAX_B1_BATCH 3124253146UL
49 : #else
50 : /* nth_prime(2^(MAX_HEIGHT-1))-1 */
51 : #define MAX_B1_BATCH 50685770166ULL
52 : #endif
53 :
54 : /* If forbiddenres != NULL, forbiddenres = "m r_1 ... r_k -1" indicating that
55 : if p = r_i mod m, then p^2 should be considered instead of p. This has
56 : only a sense for CM curves. We assume r_1 < r_2 < ... < r_k.
57 : Typical example: "4 3 -1" for curves Y^2 = X^3 + a * X.
58 : */
59 : void
60 137 : compute_s (mpz_t s, ecm_uint B1, int *forbiddenres ATTRIBUTE_UNUSED)
61 : {
62 : mpz_t acc[MAX_HEIGHT]; /* To accumulate products of prime powers */
63 : mpz_t ppz;
64 : unsigned int i, j;
65 137 : ecm_uint pi = 2, pp, maxpp, qi;
66 : prime_info_t prime_info;
67 :
68 137 : prime_info_init (prime_info);
69 :
70 137 : ASSERT_ALWAYS (B1 <= MAX_B1_BATCH);
71 :
72 4521 : for (j = 0; j < MAX_HEIGHT; j++)
73 4384 : mpz_init (acc[j]); /* sets acc[j] to 0 */
74 137 : mpz_init (ppz);
75 :
76 137 : i = 0;
77 775777 : while (pi <= B1)
78 : {
79 775640 : pp = qi = pi;
80 775640 : maxpp = B1 / qi;
81 : #ifdef HAVE_ADDLAWS
82 775640 : if (forbiddenres != NULL && pi > 2)
83 : {
84 : /* non splitting primes can occur in even powers only */
85 0 : int rp = (int)(pi % forbiddenres[0]);
86 0 : for (j = 1; forbiddenres[j] >= 0; j++)
87 0 : if (rp >= forbiddenres[j])
88 0 : break;
89 0 : if (rp == forbiddenres[j])
90 : {
91 : /* printf("p=%lu is forbidden\n", pi); */
92 0 : if (qi <= maxpp)
93 : {
94 : /* qi <= B1/qi => qi^2 <= B1, let it go */
95 0 : qi *= qi;
96 : }
97 : else
98 : {
99 : /* qi is too large, do not increment i */
100 0 : pi = getprime_mt (prime_info);
101 0 : continue;
102 : }
103 : }
104 : }
105 : #endif
106 783320 : while (pp <= maxpp)
107 7680 : pp *= qi;
108 :
109 : #if ECM_UINT_MAX == 4294967295
110 : mpz_set_ui (ppz, pp);
111 : #else
112 775640 : mpz_set_uint64 (ppz, pp);
113 : #endif
114 :
115 775640 : if ((i & 1) == 0)
116 387872 : mpz_set (acc[0], ppz);
117 : else
118 387768 : mpz_mul (acc[0], acc[0], ppz);
119 :
120 775640 : j = 0;
121 : /* We have accumulated i+1 products so far. If bits 0..j of i are all
122 : set, then i+1 is a multiple of 2^(j+1). */
123 1550488 : while ((i & (1 << j)) != 0)
124 : {
125 : /* we use acc[MAX_HEIGHT-1] as 0-sentinel below, thus we need
126 : j+1 < MAX_HEIGHT-1 */
127 : ASSERT (j + 1 < MAX_HEIGHT - 1);
128 774848 : if ((i & (1 << (j + 1))) == 0) /* i+1 is not multiple of 2^(j+2),
129 : thus add[j+1] is "empty" */
130 387768 : mpz_swap (acc[j+1], acc[j]); /* avoid a copy with mpz_set */
131 : else
132 387080 : mpz_mul (acc[j+1], acc[j+1], acc[j]); /* accumulate in acc[j+1] */
133 774848 : mpz_set_ui (acc[j], 1);
134 774848 : j++;
135 : }
136 :
137 775640 : i++;
138 775640 : pi = getprime_mt (prime_info);
139 : }
140 :
141 1417 : for (mpz_set (s, acc[0]), j = 1; mpz_cmp_ui (acc[j], 0) != 0; j++)
142 1280 : mpz_mul (s, s, acc[j]);
143 :
144 137 : prime_info_clear (prime_info); /* free the prime tables */
145 :
146 4521 : for (i = 0; i < MAX_HEIGHT; i++)
147 4384 : mpz_clear (acc[i]);
148 137 : mpz_clear (ppz);
149 137 : }
150 :
151 : #if 0
152 : /* this function is useful in debug mode to print non-normalized residues */
153 : static void
154 : mpresn_print (mpres_t x, mpmod_t n)
155 : {
156 : mp_size_t m, xn;
157 :
158 : xn = SIZ(x);
159 : m = ABSIZ(x);
160 : MPN_NORMALIZE(PTR(x), m);
161 : SIZ(x) = xn >= 0 ? m : -m;
162 : gmp_printf ("%Zd\n", x);
163 : SIZ(x) = xn;
164 : }
165 : #endif
166 :
167 : /* (x1:z1) <- 2(x1:z1)
168 : (x2:z2) <- (x1:z1) + (x2:z2)
169 : assume (x2:z2) - (x1:z1) = (2:1)
170 : Uses 4 modular multiplies and 4 modular squarings.
171 : Inputs are x1, z1, x2, z2, d, n.
172 : Use two auxiliary variables: t, w (it seems using one only is not possible
173 : if all mpresn_mul and mpresn_sqr calls don't overlap input and output).
174 :
175 : In the batch 1 mode, we pass d_prime such that the actual d is d_prime/beta.
176 : Since beta is a square, if d_prime is a square (on 64-bit machines),
177 : so is d.
178 : In mpresn_mul_1, we multiply by d_prime = beta*d and divide by beta.
179 : */
180 : static void
181 12724368 : dup_add_batch1 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
182 : mpres_t t, mpres_t w, mp_limb_t d_prime, mpmod_t n)
183 : {
184 : /* active: x1 z1 x2 z2 */
185 12724368 : mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
186 : /* active: w z1 x2 z2 */
187 12724368 : mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
188 : /* active: w z1 x1 x2 */
189 :
190 12724368 : mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
191 : /* active: w z1 x1 z2 */
192 12724368 : mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
193 : /* active: w z1 x2 z2 */
194 12724368 : mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */
195 : /* active: w t x2 z2 */
196 12724368 : mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */
197 : /* active: z1 t x2 z2 */
198 :
199 12724368 : mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
200 : /* active: x1 z1 t x2 z2 */
201 :
202 12724368 : mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */
203 : /* active: x1 w t x2 z2 */
204 :
205 12724368 : mpresn_mul_1 (z1, w, d_prime, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
206 : /* active: x1 z1 w t x2 z2 */
207 :
208 12724368 : mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
209 : /* active: x1 w t x2 z2 */
210 12724368 : mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
211 : /* active: x1 z1 x2 z2 */
212 :
213 12724368 : mpresn_addsub (w, z2, x2, z2, n);
214 : /* active: x1 z1 w z2 */
215 :
216 12724368 : mpresn_sqr (x2, w, n);
217 : /* active: x1 z1 x2 z2 */
218 12724368 : mpresn_sqr (w, z2, n);
219 : /* active: x1 z1 x2 w */
220 12724368 : mpresn_add (z2, w, w, n);
221 12724368 : }
222 :
223 : static void
224 879000 : dup_add_batch2 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
225 : mpres_t t, mpres_t w, mpres_t d, mpmod_t n)
226 : {
227 : /* active: x1 z1 x2 z2 */
228 879000 : mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
229 : /* active: w z1 x2 z2 */
230 879000 : mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
231 : /* active: w z1 x1 x2 */
232 :
233 879000 : mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
234 : /* active: w z1 x1 z2 */
235 879000 : mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
236 : /* active: w z1 x2 z2 */
237 879000 : mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */
238 : /* active: w t x2 z2 */
239 879000 : mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */
240 : /* active: z1 t x2 z2 */
241 :
242 879000 : mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
243 : /* active: x1 z1 t x2 z2 */
244 :
245 879000 : mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */
246 : /* active: x1 w t x2 z2 */
247 :
248 879000 : mpresn_mul (z1, w, d, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
249 : /* active: x1 z1 w t x2 z2 */
250 :
251 879000 : mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
252 : /* active: x1 w t x2 z2 */
253 879000 : mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
254 : /* active: x1 z1 x2 z2 */
255 :
256 879000 : mpresn_addsub (w, z2, x2, z2, n);
257 : /* active: x1 z1 w z2 */
258 :
259 879000 : mpresn_sqr (x2, w, n);
260 : /* active: x1 z1 x2 z2 */
261 879000 : mpresn_sqr (w, z2, n);
262 : /* active: x1 z1 x2 w */
263 879000 : mpresn_add (z2, w, w, n);
264 879000 : }
265 :
266 :
267 : /* Input: x is initial point
268 : A is curve parameter in Montgomery's form:
269 : g*y^2*z = x^3 + a*x^2*z + x*z^2
270 : n is the number to factor
271 : B1 is the stage 1 bound
272 : Output: If a factor is found, it is returned in x.
273 : Otherwise, x contains the x-coordinate of the point computed
274 : in stage 1 (with z coordinate normalized to 1).
275 : B1done is set to B1 if stage 1 completed normally,
276 : or to the largest prime processed if interrupted, but never
277 : to a smaller value than B1done was upon function entry.
278 : Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise
279 : ECM_NO_FACTOR_FOUND
280 : */
281 : /*
282 : For now we don't take into account go stop_asap and chkfilename
283 : */
284 : int
285 152 : ecm_stage1_batch (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1,
286 : double *B1done, int batch, mpz_t s)
287 : {
288 : mp_limb_t d_1;
289 : mpz_t d_2;
290 :
291 : mpres_t x1, z1, x2, z2;
292 : ecm_uint i;
293 : mpres_t t, u;
294 152 : int ret = ECM_NO_FACTOR_FOUND;
295 :
296 152 : mpres_init (x1, n);
297 152 : mpres_init (z1, n);
298 152 : mpres_init (x2, n);
299 152 : mpres_init (z2, n);
300 152 : mpres_init (t, n);
301 152 : mpres_init (u, n);
302 152 : if (batch == ECM_PARAM_BATCH_2)
303 48 : mpres_init (d_2, n);
304 :
305 : /* initialize P */
306 152 : mpres_set (x1, x, n);
307 152 : mpres_set_ui (z1, 1, n); /* P1 <- 1P */
308 :
309 : /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */
310 152 : if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
311 : {
312 104 : mpres_get_z (u, A, n);
313 104 : mpz_add_ui (u, u, 2);
314 104 : mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2);
315 104 : mpres_set_z_for_gcd (u, u, n); /* reduces u mod n */
316 104 : if (mpz_size (u) > 1)
317 : {
318 0 : mpres_get_z (u, A, n);
319 0 : outputf (OUTPUT_ERROR,
320 : "Error, 2^%d*(A+2) should fit in a mp_limb_t, A=%Zd\n",
321 : GMP_NUMB_BITS - 2, u);
322 0 : return ECM_ERROR;
323 : }
324 104 : d_1 = mpz_getlimbn (u, 0);
325 : }
326 : else
327 : {
328 : /* b = (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS)
329 : for MODMULN or REDC, B=2^GMP_NUMB_BITS for batch1,
330 : and B=1 otherwise */
331 48 : mpres_add_ui (d_2, A, 2, n);
332 48 : mpres_div_2exp (d_2, d_2, 2, n);
333 : }
334 :
335 : /* Compute 2P : no need to duplicate P, the coordinates are simple. */
336 152 : mpres_set_ui (x2, 9, n);
337 : /* here d = d_1 / GMP_NUMB_BITS */
338 152 : if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
339 : {
340 : /* warning: mpres_set_ui takes an unsigned long which has only 32 bits
341 : on Windows, while d_1 might have 64 bits */
342 104 : ASSERT_ALWAYS (mpz_size (u) == 1 && mpz_getlimbn (u, 0) == d_1);
343 104 : mpres_set_z (z2, u, n);
344 104 : mpres_div_2exp (z2, z2, GMP_NUMB_BITS, n);
345 : }
346 : else
347 48 : mpres_set (z2, d_2, n);
348 :
349 152 : mpres_mul_2exp (z2, z2, 6, n);
350 152 : mpres_add_ui (z2, z2, 8, n); /* P2 <- 2P = (9 : : 64d+8) */
351 :
352 : /* invariant: if j represents the upper bits of s,
353 : then P1 = j*P and P2=(j+1)*P */
354 :
355 152 : mpresn_pad (x1, n);
356 152 : mpresn_pad (z1, n);
357 152 : mpresn_pad (x2, n);
358 152 : mpresn_pad (z2, n);
359 :
360 : /* now perform the double-and-add ladder */
361 152 : if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
362 : {
363 12724472 : for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
364 : {
365 12724368 : if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
366 : /* P2 <- P1+P2 P1 <- 2*P1 */
367 6366560 : dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n);
368 : else /* (j,j+1) -> (2j+1,2j+2) */
369 : /* P1 <- P1+P2 P2 <- 2*P2 */
370 6357808 : dup_add_batch1 (x2, z2, x1, z1, t, u, d_1, n);
371 : }
372 : }
373 : else /* batch = ECM_PARAM_BATCH_2 */
374 : {
375 48 : mpresn_pad (d_2, n);
376 879048 : for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
377 : {
378 879000 : if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
379 : /* P2 <- P1+P2 P1 <- 2*P1 */
380 441376 : dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n);
381 : else /* (j,j+1) -> (2j+1,2j+2) */
382 : /* P1 <- P1+P2 P2 <- 2*P2 */
383 437624 : dup_add_batch2 (x2, z2, x1, z1, t, u, d_2, n);
384 : }
385 : }
386 :
387 152 : *B1done=B1;
388 :
389 152 : mpresn_unpad (x1);
390 152 : mpresn_unpad (z1);
391 :
392 152 : if (!mpres_invert (u, z1, n)) /* Factor found? */
393 : {
394 0 : mpres_gcd (f, z1, n);
395 0 : ret = ECM_FACTOR_FOUND_STEP1;
396 : }
397 152 : mpres_mul (x, x1, u, n);
398 :
399 152 : mpz_clear (x1);
400 152 : mpz_clear (z1);
401 152 : mpz_clear (x2);
402 152 : mpz_clear (z2);
403 152 : mpz_clear (t);
404 152 : mpz_clear (u);
405 152 : if (batch == ECM_PARAM_BATCH_2)
406 48 : mpz_clear (d_2);
407 :
408 152 : return ret;
409 : }
|