Line data Source code
1 : /* ecm_ntt.c - high level poly functions to interface between ecm and sp
2 :
3 : Copyright 2005, 2006, 2007, 2008, 2009, 2011, 2012 Dave Newman,
4 : Paul Zimmermann, Alexander Kruppa.
5 :
6 : This file is part of the ECM Library.
7 :
8 : The ECM Library is free software; you can redistribute it and/or modify
9 : it under the terms of the GNU Lesser General Public License as published by
10 : the Free Software Foundation; either version 3 of the License, or (at your
11 : option) any later version.
12 :
13 : The ECM Library is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 : License for more details.
17 :
18 : You should have received a copy of the GNU Lesser General Public License
19 : along with the ECM Library; see the file COPYING.LIB. If not, see
20 : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 :
23 : #include <stdio.h>
24 : #include <stdlib.h>
25 : #include <string.h>
26 : #include "sp.h"
27 : #include "ecm-impl.h"
28 :
29 : #ifdef HAVE_UNISTD_H
30 : #include <unistd.h> /* for unlink */
31 : #endif
32 :
33 : #define UNUSED 0
34 :
35 : /* memory: 4 * len mpspv coeffs */
36 : void
37 21277 : ntt_mul (mpzv_t r, mpzv_t x, mpzv_t y, spv_size_t len, mpzv_t t,
38 : int monic, mpzspm_t mpzspm)
39 : {
40 : mpzspv_t u, v;
41 :
42 21277 : if (len < MUL_NTT_THRESHOLD)
43 : {
44 1366 : list_mul (r, x, len, y, len, monic, t);
45 1366 : return;
46 : }
47 :
48 19911 : u = mpzspv_init (2 * len, mpzspm);
49 19911 : v = mpzspv_init (2 * len, mpzspm);
50 :
51 19911 : mpzspv_from_mpzv (v, 0, y, len, mpzspm);
52 19911 : mpzspv_from_mpzv (u, 0, x, len, mpzspm);
53 :
54 19911 : mpzspv_mul_ntt(u, 0, u, 0, len, v, 0, len, 2 * len, monic,
55 : monic ? 2 * len : 0, mpzspm,
56 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
57 19911 : mpzspv_to_mpzv (u, 0, r, 2 * len - 1 + monic, mpzspm);
58 :
59 19911 : mpzspv_clear (u, mpzspm);
60 19911 : mpzspv_clear (v, mpzspm);
61 : }
62 :
63 : /* memory: 2 * len mpzspv coeffs */
64 : void
65 3108 : ntt_PolyFromRoots (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t,
66 : mpzspm_t mpzspm)
67 : {
68 : mpzspv_t x;
69 : spv_size_t i, m;
70 :
71 : ASSERT (len == ((spv_size_t)1) << ceil_log2 (len));
72 :
73 3108 : if (len <= MUL_NTT_THRESHOLD)
74 : {
75 2479 : PolyFromRoots (r, a, len, t, mpzspm->modulus);
76 2479 : return;
77 : }
78 :
79 629 : x = mpzspv_init (2 * len, mpzspm);
80 :
81 6251 : for (i = 0; i < len; i += MUL_NTT_THRESHOLD)
82 : {
83 5622 : PolyFromRoots (r, a + i, MUL_NTT_THRESHOLD, t, mpzspm->modulus);
84 5622 : mpzspv_from_mpzv (x, 2 * i, r, MUL_NTT_THRESHOLD, mpzspm);
85 : }
86 :
87 1973 : for (m = MUL_NTT_THRESHOLD; m < len; m *= 2)
88 : {
89 6337 : for (i = 0; i < 2 * len; i += 4 * m)
90 : {
91 4993 : mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm,
92 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
93 :
94 4993 : if (2 * m < len)
95 4364 : mpzspv_normalise (x, i, 2 * m, mpzspm);
96 : }
97 : }
98 :
99 629 : mpzspv_to_mpzv (x, 0, r, len, mpzspm);
100 :
101 629 : mpzspv_clear (x, mpzspm);
102 : }
103 :
104 :
105 : /* memory: 2 * len mpzspv coeffs */
106 : int
107 1641 : ntt_PolyFromRoots_Tree (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t,
108 : int dolvl, mpzspm_t mpzspm, mpzv_t *Tree, FILE *TreeFile)
109 : {
110 : mpzspv_t x;
111 : spv_size_t i, m, m_max;
112 : mpzv_t src;
113 1641 : mpzv_t *dst = Tree + ceil_log2 (len) - 1;
114 :
115 : ASSERT (len == ((spv_size_t)1) << ceil_log2 (len));
116 :
117 1641 : x = mpzspv_init (2 * len, mpzspm);
118 :
119 1641 : if (dolvl >= 0)
120 : {
121 835 : src = a;
122 835 : dst = &r;
123 : }
124 : else
125 : {
126 : /* Copy the roots into the destination level of the tree (negating
127 : if so desired), set the source to this level (which now contains
128 : the possibly negated roots), and advance the destination level
129 : of the tree to the next level */
130 806 : src = *dst;
131 : /* we consider x + root[i], which means we consider negated roots */
132 806 : list_set (*dst--, a, len);
133 : }
134 :
135 1641 : m = (dolvl == -1) ? 1 : 1 << (ceil_log2 (len) - 1 - dolvl);
136 1641 : m_max = (dolvl == -1) ? len : 2 * m;
137 :
138 7876 : for (; m < m_max && m < MUL_NTT_THRESHOLD; m *= 2)
139 : {
140 : /* dst = &r anyway for dolvl != -1 */
141 6235 : if (m == len / 2)
142 707 : dst = &r;
143 :
144 6235 : if (TreeFile && list_out_raw (TreeFile, src, len) == ECM_ERROR)
145 : {
146 0 : outputf (OUTPUT_ERROR, "Error writing product tree of F\n");
147 0 : return ECM_ERROR;
148 : }
149 :
150 1009444 : for (i = 0; i < len; i += 2 * m)
151 1003209 : list_mul (t + i, src + i, m, src + i + m, m, 1, t + len);
152 :
153 6235 : list_mod (*dst, t, len, mpzspm->modulus);
154 :
155 6235 : src = *dst--;
156 : }
157 :
158 2040 : for (; m < m_max; m *= 2)
159 : {
160 : ASSERT (m > 1); /* This code does not do the sign change. Let's assume
161 : MUL_NTT_THRESHOLD is always large enough that the
162 : degree 1 product are done in the above loop */
163 : /* dst = &r anyway for dolvl != -1 */
164 399 : if (m == len / 2)
165 213 : dst = &r;
166 :
167 3204 : for (i = 0; i < 2 * len; i += 4 * m)
168 : {
169 2949 : if (TreeFile &&
170 144 : list_out_raw (TreeFile, src + i / 2, 2 * m) == ECM_ERROR)
171 0 : return ECM_ERROR;
172 :
173 2805 : mpzspv_from_mpzv (x, i, src + i / 2, m, mpzspm);
174 2805 : mpzspv_from_mpzv (x, i + 2 * m, src + i / 2 + m, m, mpzspm);
175 2805 : mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm,
176 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
177 2805 : mpzspv_to_mpzv (x, i, *dst + i / 2, 2 * m, mpzspm);
178 :
179 : /* we only do the mod reduction to reduce the file size a bit */
180 2805 : if (TreeFile)
181 144 : list_mod (*dst + i / 2, *dst + i / 2, 2 * m, mpzspm->modulus);
182 : }
183 :
184 399 : src = *dst--;
185 : }
186 :
187 1641 : mpzspv_clear (x, mpzspm);
188 :
189 1641 : return 0;
190 : }
191 :
192 :
193 : /* 2 NTTs of size 2 * len
194 : * 2 NTTs of size len
195 : *
196 : * memory: 2 * len mpzspv coeffs */
197 : void
198 13085 : ntt_PrerevertDivision (mpzv_t a, mpzv_t b, mpzv_t invb, mpzspv_t sp_b,
199 : mpzspv_t sp_invb, spv_size_t len, mpzv_t t, mpzspm_t mpzspm)
200 : {
201 : mpzspv_t x;
202 :
203 13085 : if (len < PREREVERTDIVISION_NTT_THRESHOLD)
204 : {
205 673 : PrerevertDivision (a, b, invb, len, t, mpzspm->modulus);
206 673 : return;
207 : }
208 :
209 12412 : x = mpzspv_init (2 * len, mpzspm);
210 :
211 : /* y = TOP (TOP (a) * invb) */
212 12412 : mpzspv_set_sp (x, 0, 0, len + 1, mpzspm);
213 12412 : mpzspv_from_mpzv (x, len + 1, a + len, len - 1, mpzspm);
214 12412 : mpzspv_mul_ntt (x, 0, x, 0, 2 * len, sp_invb, 0, UNUSED, 2 * len, 0, 0, mpzspm,
215 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
216 12412 : mpzspv_normalise (x, 0, len, mpzspm);
217 :
218 12412 : mpzspv_mul_ntt (x, 0, x, 0, len, sp_b, 0, UNUSED, len, 0, 0, mpzspm,
219 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
220 12412 : mpzspv_to_mpzv (x, 0, t, len, mpzspm);
221 :
222 12412 : mpzspv_clear (x, mpzspm);
223 :
224 12412 : list_sub (t, t, a + len, len - 1);
225 12412 : list_sub (a, a, t, len);
226 : /* can we avoid this mod without risking overflow later? */
227 12412 : list_mod (a, a, len, mpzspm->modulus);
228 : }
229 :
230 : /* memory: 7/2 * len mpzspv coeffs */
231 929 : void ntt_PolyInvert (mpzv_t q, mpzv_t b, spv_size_t len, mpzv_t t,
232 : mpzspm_t mpzspm)
233 : {
234 929 : spv_size_t k = POLYINVERT_NTT_THRESHOLD / 2;
235 : mpzspv_t w, x, y, z;
236 :
237 929 : if (len < POLYINVERT_NTT_THRESHOLD)
238 : {
239 615 : PolyInvert (q, b, len, t, mpzspm->modulus);
240 615 : return;
241 : }
242 :
243 314 : PolyInvert (q + len - k, b + len - k, k, t, mpzspm->modulus);
244 :
245 314 : w = mpzspv_init (len / 2, mpzspm);
246 314 : x = mpzspv_init (len, mpzspm);
247 314 : y = mpzspv_init (len, mpzspm);
248 314 : z = mpzspv_init (len, mpzspm);
249 :
250 314 : mpzspv_from_mpzv (x, 0, q + len - k - 1, k + 1, mpzspm);
251 314 : mpzspv_from_mpzv (y, 0, b, len - 1, mpzspm);
252 :
253 934 : for (; k < len; k *= 2)
254 : {
255 620 : mpzspv_set (w, 0, x, 1, k, mpzspm);
256 620 : mpzspv_set (z, 0, y, len - 2 * k, 2 * k - 1, mpzspm);
257 620 : mpzspv_mul_ntt (z, 0, z, 0, 2 * k - 1, x, 0, k + 1, 2 * k, 0, 0, mpzspm,
258 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
259 620 : mpzspv_normalise (z, k, k, mpzspm);
260 620 : mpzspv_neg (z, 0, z, k, k, mpzspm);
261 :
262 620 : mpzspv_mul_ntt (x, 0, x, 0, 0, z, 0, k, 2 * k, 0, 0, mpzspm,
263 : NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
264 620 : if (2 * k < len)
265 306 : mpzspv_normalise (x, k, k, mpzspm);
266 620 : mpzspv_set (x, 1, x, k, k, mpzspm); /* legal overlap */
267 620 : mpzspv_set (x, k + 1, w, 0, MIN(k, len / 2 - 1), mpzspm);
268 : }
269 :
270 314 : mpzspv_to_mpzv (x, 1, q, len - POLYINVERT_NTT_THRESHOLD / 2, mpzspm);
271 :
272 : #if defined DEBUG
273 : ntt_mul (t, q, b, len, NULL, 0, mpzspm);
274 : list_mod (t, t, 2 * len - 1, mpzspm->modulus);
275 :
276 : spv_size_t i;
277 : for (i = len - 1; i < 2 * len - 2; i++)
278 : if (mpz_cmp_ui (t[i], 0))
279 : printf ("error in ntt_PolyInvert\n");
280 : if (mpz_cmp_ui (t[2 * len - 2], 1))
281 : printf ("error in ntt_PolyInvert-\n");
282 : #endif
283 :
284 314 : mpzspv_clear (w, mpzspm);
285 314 : mpzspv_clear (x, mpzspm);
286 314 : mpzspv_clear (y, mpzspm);
287 314 : mpzspv_clear (z, mpzspm);
288 : }
289 :
290 :
291 : /* memory: 4 * len mpzspv coeffs */
292 : int
293 916 : ntt_polyevalT (mpzv_t b, spv_size_t len, mpzv_t *Tree, mpzv_t T,
294 : mpzspv_t sp_invF, mpzspm_t mpzspm, char *TreeFilenameStem)
295 : {
296 : spv_size_t m, i;
297 916 : FILE *TreeFile = NULL;
298 : /* assume this "small" malloc will not fail in normal usage */
299 916 : char *TreeFilename = NULL;
300 916 : mpzv_t *Tree_orig = Tree;
301 916 : int level = 0; /* = ceil_log2 (len / m) - 1 */
302 916 : mpzspv_t x = mpzspv_init (2 * len, mpzspm);
303 916 : mpzspv_t y = mpzspv_init (2 * len, mpzspm);
304 :
305 916 : if (TreeFilenameStem)
306 : {
307 113 : TreeFilename = (char *) malloc (strlen (TreeFilenameStem) + 1 + 2 + 1);
308 113 : if (TreeFilename == NULL)
309 : {
310 0 : fprintf (stderr, "Cannot allocate memory in ntt_polyevalT\n");
311 0 : exit (1);
312 : }
313 : }
314 :
315 916 : mpzspv_from_mpzv (x, 0, b, len, mpzspm);
316 916 : mpzspv_mul_ntt(x, 0, x, 0, len, sp_invF, 0, UNUSED, 2 * len, 0, 0, mpzspm,
317 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
318 916 : mpzspv_normalise (x, len - 1, len, mpzspm);
319 916 : mpzspv_set (y, 0, x, len - 1, len, mpzspm); /* y = high (b * invF) */
320 916 : mpzspv_reverse (y, 0, len, mpzspm); /* y = rev (high (b * invF)) */
321 :
322 1685 : for (m = len / 2; m >= POLYEVALT_NTT_THRESHOLD; m /= 2)
323 : {
324 769 : if (TreeFilenameStem)
325 : {
326 140 : Tree = &T;
327 :
328 140 : sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level);
329 :
330 140 : TreeFile = fopen (TreeFilename, "rb");
331 140 : if (TreeFile == NULL)
332 : {
333 0 : outputf (OUTPUT_ERROR,
334 : "Error opening file %s for product tree of F\n",
335 : TreeFilename);
336 0 : mpzspv_clear (x, mpzspm);
337 0 : mpzspv_clear (y, mpzspm);
338 0 : return ECM_ERROR;
339 : }
340 :
341 140 : list_inp_raw (*Tree, TreeFile, len);
342 :
343 140 : fclose (TreeFile);
344 140 : unlink (TreeFilename);
345 : }
346 :
347 6690 : for (i = 0; i < len; i += 2 * m)
348 : {
349 :
350 5921 : list_revert (*Tree + i, m);
351 5921 : mpzspv_set_sp (x, 0, 1, 1, mpzspm);
352 5921 : mpzspv_from_mpzv (x, 1, *Tree + i, m, mpzspm);
353 : /* x contains reversed monic poly */
354 5921 : mpzspv_mul_ntt (x, 0, x, 0, m + 1, y, i, 2 * m, 2 * m, 0, 0, mpzspm,
355 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
356 5921 : if (m > POLYEVALT_NTT_THRESHOLD)
357 2766 : mpzspv_normalise (x, m, m, mpzspm);
358 :
359 5921 : list_revert (*Tree + i + m, m);
360 5921 : mpzspv_set_sp (x, 2 * m, 1, 1, mpzspm);
361 5921 : mpzspv_from_mpzv (x, 2 * m + 1, *Tree + i + m, m, mpzspm);
362 5921 : mpzspv_mul_ntt(x, 2 * m, x, 2 * m, m + 1, y, i, UNUSED, 2 * m, 0, 0, mpzspm,
363 : NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
364 5921 : if (m > POLYEVALT_NTT_THRESHOLD)
365 2766 : mpzspv_normalise (x, 3 * m, m, mpzspm);
366 :
367 5921 : mpzspv_set (y, i, x, 3 * m, m, mpzspm);
368 5921 : mpzspv_set (y, i + m, x, m, m, mpzspm);
369 : }
370 :
371 769 : Tree++;
372 769 : level++;
373 : }
374 :
375 916 : mpzspv_clear (x, mpzspm);
376 916 : mpzspv_to_mpzv (y, 0, T, len, mpzspm); /* T = rev (high (b * invF)) */
377 916 : mpzspv_clear (y, mpzspm);
378 1159402 : for (i = 0; i < len; i++)
379 1158486 : mpz_mod (T[i], T[i], mpzspm->modulus);
380 :
381 6747 : for (; m >= 1; m /= 2)
382 : {
383 5831 : if (TreeFilenameStem)
384 : {
385 684 : sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level);
386 :
387 684 : TreeFile = fopen (TreeFilename, "rb");
388 684 : if (TreeFile == NULL)
389 : {
390 0 : outputf (OUTPUT_ERROR,
391 : "Error opening file %s for product tree of F\n",
392 : TreeFilename);
393 0 : return ECM_ERROR;
394 : }
395 : }
396 :
397 5831 : TUpTree (T, Tree_orig, len, T + len, level++, 0,
398 5831 : mpzspm->modulus, TreeFile);
399 :
400 5831 : if (TreeFilenameStem)
401 : {
402 684 : fclose (TreeFile);
403 684 : unlink (TreeFilename);
404 : }
405 : }
406 :
407 916 : if (TreeFilenameStem)
408 113 : free (TreeFilename);
409 916 : list_swap (b, T, len);
410 916 : return 0;
411 : }
|