Line data Source code
1 : /* sp.h - header file for the sp library
2 :
3 : Copyright 2005, 2006, 2007, 2008, 2010, 2011, 2012 Dave Newman, Jason
4 : Papadopoulos, Paul Zimmermann, Brian Gladman, Alexander Kruppa.
5 :
6 : Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 : 2004, 2005, 2010 Free Software Foundation, Inc. (for parts from gmp-impl.h).
8 :
9 : This file is part of the SP library.
10 :
11 : The SP Library is free software; you can redistribute it and/or modify
12 : it under the terms of the GNU Lesser General Public License as published by
13 : the Free Software Foundation; either version 3 of the License, or (at your
14 : option) any later version.
15 :
16 : The SP Library is distributed in the hope that it will be useful, but
17 : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 : License for more details.
20 :
21 : You should have received a copy of the GNU Lesser General Public License
22 : along with the SP Library; see the file COPYING.LIB. If not, write to
23 : the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
24 : MA 02110-1301, USA.
25 : */
26 :
27 : #ifndef _SP_H
28 : #define _SP_H
29 :
30 : #include "config.h"
31 : #include <stdlib.h>
32 : #include "basicdefs.h"
33 : #include "ecm-gmp.h"
34 :
35 : #ifdef HAVE_SYS_TYPES_H
36 : #include <sys/types.h> /* needed for size_t */
37 : #endif
38 :
39 : #ifndef TUNE
40 : #include "ecm-params.h"
41 : #else
42 : extern size_t NTT_GFP_TWIDDLE_DIF_BREAKOVER;
43 : extern size_t NTT_GFP_TWIDDLE_DIT_BREAKOVER;
44 : extern size_t MUL_NTT_THRESHOLD;
45 : extern size_t PREREVERTDIVISION_NTT_THRESHOLD;
46 : extern size_t POLYINVERT_NTT_THRESHOLD;
47 : extern size_t POLYEVALT_NTT_THRESHOLD;
48 : extern size_t MPZSPV_NORMALISE_STRIDE;
49 : #endif
50 :
51 : #include <gmp.h>
52 :
53 : #if defined( __GNUC__ ) && __GNUC__ >= 3
54 : #define ATTRIBUTE_UNUSED __attribute__ ((unused))
55 : #else
56 : #define ATTRIBUTE_UNUSED
57 : #endif
58 :
59 : /**************
60 : * GMP_IMPL.H *
61 : **************/
62 :
63 : #ifdef WANT_ASSERT
64 : #include <assert.h>
65 : #define ASSERT(expr) assert (expr)
66 : #else
67 : #define ASSERT(expr) do {} while (0)
68 : #endif
69 :
70 : /* the following was inspired by longlong.h and gmp-impl.h;
71 : * note that a small prime must be the size of a GMP limb */
72 : typedef mp_limb_t UWtype;
73 : typedef unsigned int UHWtype;
74 : #if ((defined(_PA_RISC1_1) || defined(__mips)) && defined(__GNUC__))
75 : /* this seems to be needed, otherwise umul_ppmm() does not work properly */
76 : typedef mp_limb_t USItype __attribute__ ((mode (SI)));
77 : typedef mp_limb_t UDItype __attribute__ ((mode (DI)));
78 : #else
79 : typedef mp_limb_t USItype;
80 : typedef mp_limb_t UDItype;
81 : #endif
82 :
83 : #ifndef W_TYPE_SIZE
84 : #define W_TYPE_SIZE GMP_LIMB_BITS
85 : #endif
86 :
87 : #ifndef ULONG_MAX
88 : #define ULONG_MAX __GMP_ULONG_MAX
89 : #endif
90 :
91 : #define LONGLONG_STANDALONE
92 : #include "longlong.h"
93 :
94 : /* we use the remainder tree for products of 2^I0_THRESHOLD moduli or more,
95 : and the naive method for fewer moduli. We must have I0_THRESHOLD >= 1. */
96 : #define I0_THRESHOLD 7
97 :
98 : /*********
99 : * TYPES *
100 : *********/
101 :
102 : /* SP */
103 :
104 : /* the type for both a small prime, and a residue modulo a small prime.
105 : * Small primes must be 1 bit smaller than the word size for 32-bit
106 : * systems (otherwise there may not be enough suitable primes), but
107 : * may be 2+ bits smaller when the word size exceeds 32 bits (and this
108 : * simplifies modular reductions)
109 : *
110 : * For a residue x modulo a sp p, we require 0 <= x < p */
111 :
112 : #ifndef SP_NUMB_BITS
113 : #if GMP_LIMB_BITS == 64
114 : #define SP_NUMB_BITS 62
115 : #elif GMP_LIMB_BITS == 32
116 : #define SP_NUMB_BITS 31
117 : #endif
118 : #endif
119 :
120 : #if SP_NUMB_BITS < 30 || SP_NUMB_BITS > 63
121 : #error "invalid choice of small prime size"
122 : #endif
123 :
124 : #if SP_NUMB_BITS < 32
125 : typedef uint32_t sp_t;
126 : #define SP_TYPE_BITS 32
127 : #define PRISP PRIu32
128 : #else
129 : typedef uint64_t sp_t;
130 : #define SP_TYPE_BITS 64
131 : #define PRISP PRIu64
132 : #endif
133 :
134 : #define SP_MIN ((sp_t)1 << (SP_NUMB_BITS - 1))
135 : #define SP_MAX ((sp_t)(-1) >> (W_TYPE_SIZE - SP_NUMB_BITS))
136 :
137 : /* vector of residues modulo a common small prime */
138 : typedef sp_t * spv_t;
139 :
140 : /* length of a spv */
141 : typedef unsigned long spv_size_t;
142 :
143 : typedef struct
144 : {
145 : spv_t ntt_roots;
146 : spv_size_t twiddle_size;
147 : spv_t twiddle;
148 : } __sp_nttdata;
149 :
150 : typedef __sp_nttdata sp_nttdata_t[1];
151 :
152 : #define MAX_NTT_BLOCK_SIZE 128
153 :
154 : /* Which steps to perform in convolution product funtions:
155 : forward transform, pair-wise multiplication, inverse transform */
156 : #define NTT_MUL_STEP_FFT1 1
157 : #define NTT_MUL_STEP_FFT2 2
158 : #define NTT_MUL_STEP_MUL 4
159 : #define NTT_MUL_STEP_IFFT 8
160 :
161 : /* SPM */
162 :
163 : /* small prime modulus - this contains some precomputed constants to
164 : * calculate modulo a sp */
165 : typedef struct
166 : {
167 : sp_t sp; /* value of the sp */
168 : sp_t mul_c; /* constant used for reduction mod sp */
169 : sp_t invm; /* -1/sp mod 2^GMP_NUMB_BITS */
170 : sp_t Bpow; /* B^(n+1) mod sp where the input N has n limbs */
171 : sp_t prim_root;
172 : sp_t inv_prim_root;
173 : sp_nttdata_t nttdata;
174 : sp_nttdata_t inttdata;
175 : spv_t scratch;
176 : } __spm_struct;
177 :
178 : typedef __spm_struct * spm_t;
179 :
180 : /* MPZSPM */
181 :
182 : typedef mpz_t * mpzv_t;
183 :
184 : typedef struct
185 : {
186 : /* number of small primes needed to represent each coeff */
187 : unsigned int sp_num;
188 : spv_size_t max_ntt_size;
189 :
190 : mpz_t modulus;
191 :
192 : /* spm data */
193 : spm_t *spm;
194 :
195 : /* precomputed crt constants, see mpzspm.c */
196 : mpzv_t crt1, crt2;
197 : sp_t *crt3, **crt4, *crt5;
198 :
199 : /* product tree to speed up conversion from mpz to sp */
200 : mpzv_t *T; /* product tree */
201 : unsigned int d; /* ceil(log(sp_num)/log(2)) */
202 : } __mpzspm_struct;
203 :
204 : typedef __mpzspm_struct * mpzspm_t;
205 :
206 : /* MPZSPV */
207 :
208 : /* sp representation of a mpz polynomial */
209 :
210 : typedef spv_t * mpzspv_t;
211 :
212 :
213 : /*************
214 : * FUNCTIONS *
215 : *************/
216 :
217 : /* general */
218 :
219 : static inline unsigned int
220 75402 : ceil_log_2 (spv_size_t x)
221 : {
222 75402 : unsigned int a = 0;
223 :
224 75402 : x--;
225 508986 : while (x)
226 : {
227 433584 : a++;
228 433584 : x >>= 1;
229 : }
230 75402 : return a;
231 : }
232 :
233 : /* Conversion functions sp_t <-> mpz_t. Using mpz_*_ui() functions is not
234 : portable as those take unsigned long's, but on some systems
235 : (e.g. 64 bit Windows with Visual C), unsigned long has 32 bits while
236 : sp_t should use 64 */
237 :
238 : static inline void
239 3623032 : mpz_set_sp (mpz_t m, const sp_t n)
240 : {
241 : #if SP_TYPE_BITS == 32
242 : mpz_set_ui(m, (unsigned long)(uint32_t)n);
243 : #else /* 64-bit sp_t */
244 3623032 : mpz_set_uint64(m, n);
245 : #endif
246 3623032 : }
247 :
248 : static inline sp_t
249 3590826 : mpz_get_sp (const mpz_t n)
250 : {
251 : if (sizeof (sp_t) == sizeof (unsigned long))
252 : {
253 3590826 : return (sp_t) mpz_get_ui (n);
254 : }
255 : else if (sizeof (sp_t) == sizeof (mp_limb_t))
256 : {
257 : /* mpz_get_ui() returns the least significant bits of the absolute
258 : value of its argument that fit in an unsigned long.
259 : In the current GMP implementation with sign/magnitude
260 : representation, mpz_getlimbn() also returns the least sigificant
261 : bits of the absolute value. To allow for a future change to
262 : 2's-complement representation in GMP, we should explicitly
263 : use mpz_abs() to a temp var here. */
264 : return (sp_t) mpz_getlimbn (n, 0);
265 : }
266 : else
267 : {
268 : abort ();
269 : }
270 : }
271 :
272 :
273 : void * sp_aligned_malloc (size_t len);
274 : void sp_aligned_free (void *newptr);
275 :
276 : /* sp */
277 :
278 : /* Routines for arithmetic on residues modulo a small prime
279 : *
280 : * All functions return values in the range 0 <= x < p.
281 : *
282 : * The variable name of the modulus is 'p' if the input must be prime,
283 : * 'm' if we also allow composites. */
284 :
285 :
286 40450414285 : static inline sp_t sp_sub(sp_t a, sp_t b, sp_t m)
287 : {
288 : #if (defined(__GNUC__) || defined(__ICL)) && \
289 : (defined(__x86_64__) || defined(__i386__))
290 40450414285 : sp_t t = 0, tr = a;
291 :
292 40450414285 : __asm__ (
293 : "sub %2, %0 # sp_sub: tr -= b\n\t"
294 : "cmovc %3, %1 # sp_sub: if (a < b) t = m\n\t"
295 : : "+&r" (tr), "+r" (t)
296 : : "g" (b), "g" (m)
297 : : "cc"
298 : );
299 :
300 40450414285 : return tr + t;
301 : #elif defined(_MSC_VER) && !defined(_WIN64)
302 : __asm
303 : {
304 : mov eax, a
305 : xor edx, edx
306 : sub eax, b
307 : cmovb edx, m
308 : add eax, edx
309 : }
310 : #else
311 : if (a >= b)
312 : return a - b;
313 : else
314 : return a - b + m;
315 : #endif
316 : }
317 :
318 18566024983 : static inline sp_t sp_add(sp_t a, sp_t b, sp_t m)
319 : {
320 : #if (defined(__GNUC__) || defined(__ICL)) && \
321 : (defined(__x86_64__) || defined(__i386__))
322 18566024983 : sp_t t = a - m, tr = a + b;
323 :
324 18566024983 : __asm__ (
325 : "add %2, %1 # sp_add: t += b\n\t"
326 : "cmovc %1, %0 # sp_add: if (cy) tr = t \n\t"
327 : : "+r" (tr), "+&r" (t)
328 : : "g" (b)
329 : : "cc"
330 : );
331 :
332 18566024983 : return tr;
333 : #elif defined(_MSC_VER) && !defined(_WIN64)
334 : __asm
335 : {
336 : mov eax, a
337 : add eax, b
338 : mov edx, eax
339 : sub edx, m
340 : cmovnc eax, edx
341 : }
342 : #elif SP_NUMB_BITS <= W_TYPE_SIZE - 1
343 : sp_t t = a + b;
344 : if (t >= m)
345 : t -= m;
346 : return t;
347 : #else
348 : return sp_sub(a, m - b, m);
349 : #endif
350 : }
351 :
352 : /* functions used for modular reduction */
353 :
354 : #if SP_NUMB_BITS <= W_TYPE_SIZE - 2
355 :
356 : /* having a small modulus allows the reciprocal
357 : * to be one bit larger, which guarantees that the
358 : * initial remainder fits in a word and also that at
359 : * most one correction is necessary */
360 :
361 : #define sp_reciprocal(invxl,xl) \
362 : do { \
363 : ATTRIBUTE_UNUSED mp_limb_t dummy; \
364 : udiv_qrnnd (invxl, dummy, \
365 : (sp_t) 1 << (2 * SP_NUMB_BITS + 1 - \
366 : W_TYPE_SIZE), 0, xl); \
367 : } while (0)
368 :
369 21903959941 : static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di)
370 : {
371 : sp_t r;
372 : mp_limb_t q1, q2;
373 : ATTRIBUTE_UNUSED mp_limb_t tmp;
374 21903959941 : q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) |
375 21903959941 : nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE);
376 21903959941 : umul_ppmm (q2, tmp, q1, di);
377 21903959941 : r = nl - d * (q2 >> 1);
378 21903959941 : return sp_sub(r, d, d);
379 : }
380 :
381 : #else /* big modulus; no shortcuts allowed */
382 :
383 : #define sp_reciprocal(invxl,xl) \
384 : do { \
385 : ATTRIBUTE_UNUSED mp_limb_t dummy; \
386 : udiv_qrnnd (invxl, dummy, \
387 : (sp_t) 1 << (2 * SP_NUMB_BITS - \
388 : W_TYPE_SIZE), 0, xl); \
389 : } while (0)
390 :
391 : static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di)
392 : {
393 : mp_limb_t q1, q2, tmp, dqh, dql;
394 : q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) |
395 : nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE);
396 : umul_ppmm (q2, tmp, q1, di);
397 : umul_ppmm (dqh, dql, q2, d);
398 :
399 : tmp = nl;
400 : nl = tmp - dql;
401 : nh = nh - dqh - (nl > tmp);
402 : if (nh)
403 : nl -= d;
404 : nl = sp_sub(nl, d, d);
405 : return sp_sub(nl, d, d);
406 : }
407 :
408 : #endif
409 :
410 : /* x*y mod m */
411 : static inline sp_t
412 21837287184 : sp_mul (sp_t x, sp_t y, sp_t m, sp_t d)
413 : {
414 : sp_t u, v;
415 21837287184 : umul_ppmm (u, v, x, y);
416 21837287184 : return sp_udiv_rem (u, v, m, d);
417 : }
418 :
419 : /* x*y mod m */
420 : static inline sp_t
421 66672757 : sp_sqr (sp_t x, sp_t m, sp_t d)
422 : {
423 : sp_t u, v;
424 66672757 : umul_ppmm (u, v, x, x);
425 66672757 : return sp_udiv_rem (u, v, m, d);
426 : }
427 :
428 : #define sp_neg(x,m) ((x) == (sp_t) 0 ? (sp_t) 0 : (m) - (x))
429 :
430 : /* Returns x^a % m, uses a right-to-left powering ladder */
431 :
432 : static inline sp_t
433 2979380 : sp_pow (sp_t x, sp_t a, sp_t m, sp_t d)
434 : {
435 2979380 : sp_t partial = 1;
436 :
437 : while (1)
438 : {
439 63750089 : if (a & 1)
440 51468414 : partial = sp_mul (x, partial, m, d);
441 :
442 63750089 : a >>= 1;
443 :
444 63750089 : if (!a)
445 2979380 : return partial;
446 :
447 60770709 : x = sp_sqr (x, m, d);
448 : }
449 : }
450 :
451 : /* 1/x mod p where d is p->mul_c */
452 : #define sp_inv(x,p,d) sp_pow (x, (p) - 2, p, d)
453 :
454 : /* x / 2 mod m */
455 : #define sp_div_2(x,m) (((x) & 1) ? (m) - (((m) - (x)) >> 1) : ((x) >> 1))
456 :
457 : int sp_prime (sp_t);
458 :
459 : /* spm */
460 :
461 : spm_t spm_init (spv_size_t, sp_t, mp_size_t);
462 : void spm_clear (spm_t);
463 :
464 : /* spv */
465 :
466 : /* ASSIGNMENT */
467 :
468 : void spv_set (spv_t, spv_t, spv_size_t);
469 : void spv_rev (spv_t, spv_t, spv_size_t);
470 : void spv_set_sp (spv_t, sp_t, spv_size_t);
471 : void spv_set_zero (spv_t, spv_size_t);
472 :
473 : /* ARITHMETIC */
474 :
475 : /* add */
476 : void spv_add (spv_t, spv_t, spv_t, spv_size_t, sp_t);
477 :
478 : /* subtract */
479 : void spv_neg (spv_t, spv_t, spv_size_t, sp_t);
480 :
481 : /* pointwise multiplication */
482 : void spv_pwmul (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t);
483 : void spv_mul_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t, sp_t);
484 :
485 : void spv_random (spv_t, spv_size_t, sp_t);
486 :
487 : /* ntt_gfp */
488 :
489 : void spv_ntt_gfp_dif (spv_t, spv_size_t, spm_t);
490 : void spv_ntt_gfp_dit (spv_t, spv_size_t, spm_t);
491 :
492 : /* mpzspm */
493 :
494 : spv_size_t mpzspm_max_len (mpz_t);
495 : mpzspm_t mpzspm_init (spv_size_t, mpz_t);
496 : void mpzspm_clear (mpzspm_t);
497 :
498 : /* mpzspv */
499 :
500 : mpzspv_t mpzspv_init (spv_size_t, mpzspm_t);
501 : void mpzspv_clear (mpzspv_t, mpzspm_t);
502 : int mpzspv_verify (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
503 : void mpzspv_set (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
504 : mpzspm_t);
505 : void mpzspv_revcopy (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
506 : mpzspm_t);
507 : void mpzspv_set_sp (mpzspv_t, spv_size_t, sp_t, spv_size_t, mpzspm_t);
508 : void mpzspv_from_mpzv (mpzspv_t, const spv_size_t, const mpzv_t,
509 : const spv_size_t, mpzspm_t);
510 : void mpzspv_reverse (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
511 : void mpzspv_neg (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
512 : mpzspm_t);
513 : void mpzspv_add (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t,
514 : spv_size_t, spv_size_t, mpzspm_t);
515 : void mpzspv_to_mpzv (mpzspv_t, spv_size_t, mpzv_t, spv_size_t, mpzspm_t);
516 : void mpzspv_normalise (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
517 : void mpzspv_pwmul (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t,
518 : spv_size_t, spv_size_t, mpzspm_t);
519 : void mpzspv_to_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int,
520 : mpzspm_t);
521 : void mpzspv_from_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, mpzspm_t);
522 : void mpzspv_mul_ntt (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
523 : mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int, spv_size_t, mpzspm_t,
524 : int);
525 : void mpzspv_random (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
526 : void mpzspv_to_dct1 (mpzspv_t, mpzspv_t, spv_size_t, spv_size_t, mpzspv_t,
527 : mpzspm_t);
528 : void mpzspv_mul_by_dct (mpzspv_t, const mpzspv_t, spv_size_t, const mpzspm_t,
529 : int);
530 : void mpzspv_sqr_reciprocal (mpzspv_t, spv_size_t, const mpzspm_t);
531 :
532 : #endif /* _SP_H */
|