LCOV - unnamed - ecm/sp.h

LCOV - code coverage report

Current view:	top level - ecm - sp.h (source / functions)		Hit	Total	Coverage
Test:	unnamed	Lines:	40	40	100.0 %
Date:	2022-03-21 11:19:20	Functions:	9	9	100.0 %

          Line data    Source code

       1             : /* sp.h - header file for the sp library
       2             : 
       3             : Copyright 2005, 2006, 2007, 2008, 2010, 2011, 2012 Dave Newman, Jason
       4             : Papadopoulos, Paul Zimmermann, Brian Gladman, Alexander Kruppa.
       5             : 
       6             : Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
       7             : 2004, 2005, 2010 Free Software Foundation, Inc. (for parts from gmp-impl.h).
       8             : 
       9             : This file is part of the SP library.
      10             :   
      11             : The SP Library is free software; you can redistribute it and/or modify
      12             : it under the terms of the GNU Lesser General Public License as published by
      13             : the Free Software Foundation; either version 3 of the License, or (at your
      14             : option) any later version.
      15             : 
      16             : The SP Library is distributed in the hope that it will be useful, but
      17             : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      18             : or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      19             : License for more details.
      20             : 
      21             : You should have received a copy of the GNU Lesser General Public License
      22             : along with the SP Library; see the file COPYING.LIB.  If not, write to
      23             : the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
      24             : MA 02110-1301, USA.
      25             : */
      26             : 
      27             : #ifndef _SP_H
      28             : #define _SP_H
      29             : 
      30             : #include "config.h"
      31             : #include <stdlib.h>
      32             : #include "basicdefs.h"
      33             : #include "ecm-gmp.h"
      34             : 
      35             : #ifdef HAVE_SYS_TYPES_H
      36             : #include <sys/types.h> /* needed for size_t */
      37             : #endif
      38             : 
      39             : #ifndef TUNE
      40             : #include "ecm-params.h"
      41             : #else
      42             : extern size_t NTT_GFP_TWIDDLE_DIF_BREAKOVER;
      43             : extern size_t NTT_GFP_TWIDDLE_DIT_BREAKOVER;
      44             : extern size_t MUL_NTT_THRESHOLD;
      45             : extern size_t PREREVERTDIVISION_NTT_THRESHOLD;
      46             : extern size_t POLYINVERT_NTT_THRESHOLD;
      47             : extern size_t POLYEVALT_NTT_THRESHOLD;
      48             : extern size_t MPZSPV_NORMALISE_STRIDE;
      49             : #endif
      50             : 
      51             : #include <gmp.h>
      52             : 
      53             : #if defined( __GNUC__ ) && __GNUC__ >= 3
      54             : #define ATTRIBUTE_UNUSED __attribute__ ((unused))
      55             : #else
      56             : #define ATTRIBUTE_UNUSED
      57             : #endif
      58             : 
      59             : /**************
      60             :  * GMP_IMPL.H *
      61             :  **************/
      62             : 
      63             : #ifdef WANT_ASSERT
      64             : #include <assert.h>
      65             : #define ASSERT(expr)   assert (expr)
      66             : #else
      67             : #define ASSERT(expr)   do {} while (0)
      68             : #endif
      69             : 
      70             : /* the following was inspired by longlong.h and gmp-impl.h;
      71             :  * note that a small prime must be the size of a GMP limb */
      72             : typedef mp_limb_t UWtype;
      73             : typedef unsigned int UHWtype;
      74             : #if ((defined(_PA_RISC1_1) || defined(__mips)) && defined(__GNUC__))
      75             : /* this seems to be needed, otherwise umul_ppmm() does not work properly */
      76             : typedef mp_limb_t USItype __attribute__ ((mode (SI)));
      77             : typedef mp_limb_t UDItype __attribute__ ((mode (DI)));
      78             : #else
      79             : typedef mp_limb_t USItype;
      80             : typedef mp_limb_t UDItype;
      81             : #endif
      82             : 
      83             : #ifndef W_TYPE_SIZE
      84             : #define W_TYPE_SIZE GMP_LIMB_BITS
      85             : #endif
      86             : 
      87             : #ifndef ULONG_MAX
      88             : #define ULONG_MAX __GMP_ULONG_MAX
      89             : #endif
      90             : 
      91             : #define LONGLONG_STANDALONE
      92             : #include "longlong.h"
      93             : 
      94             : /* we use the remainder tree for products of 2^I0_THRESHOLD moduli or more,
      95             :    and the naive method for fewer moduli. We must have I0_THRESHOLD >= 1. */
      96             : #define I0_THRESHOLD 7
      97             : 
      98             : /*********
      99             :  * TYPES *
     100             :  *********/
     101             : 
     102             : /* SP */
     103             : 
     104             : /* the type for both a small prime, and a residue modulo a small prime.
     105             :  * Small primes must be 1 bit smaller than the word size for 32-bit
     106             :  * systems (otherwise there may not be enough suitable primes), but 
     107             :  * may be 2+ bits smaller when the word size exceeds 32 bits (and this
     108             :  * simplifies modular reductions)
     109             :  *
     110             :  * For a residue x modulo a sp p, we require 0 <= x < p */
     111             : 
     112             : #ifndef SP_NUMB_BITS
     113             :    #if GMP_LIMB_BITS == 64
     114             :       #define SP_NUMB_BITS 62
     115             :    #elif GMP_LIMB_BITS == 32
     116             :       #define SP_NUMB_BITS 31
     117             :    #endif
     118             : #endif
     119             : 
     120             : #if SP_NUMB_BITS < 30 || SP_NUMB_BITS > 63
     121             : #error "invalid choice of small prime size"
     122             : #endif
     123             : 
     124             : #if SP_NUMB_BITS < 32
     125             : typedef uint32_t sp_t;
     126             : #define SP_TYPE_BITS 32
     127             : #define PRISP PRIu32
     128             : #else
     129             : typedef uint64_t sp_t;
     130             : #define SP_TYPE_BITS 64
     131             : #define PRISP PRIu64
     132             : #endif
     133             : 
     134             : #define SP_MIN ((sp_t)1 << (SP_NUMB_BITS - 1))
     135             : #define SP_MAX ((sp_t)(-1) >> (W_TYPE_SIZE - SP_NUMB_BITS))
     136             : 
     137             : /* vector of residues modulo a common small prime */
     138             : typedef sp_t * spv_t;
     139             : 
     140             : /* length of a spv */
     141             : typedef unsigned long spv_size_t;
     142             : 
     143             : typedef struct
     144             : {
     145             :   spv_t ntt_roots;
     146             :   spv_size_t twiddle_size;
     147             :   spv_t twiddle;
     148             : } __sp_nttdata;
     149             : 
     150             : typedef __sp_nttdata sp_nttdata_t[1];
     151             : 
     152             : #define MAX_NTT_BLOCK_SIZE 128
     153             : 
     154             : /* Which steps to perform in convolution product funtions:
     155             :    forward transform, pair-wise multiplication, inverse transform */
     156             : #define NTT_MUL_STEP_FFT1 1
     157             : #define NTT_MUL_STEP_FFT2 2
     158             : #define NTT_MUL_STEP_MUL 4
     159             : #define NTT_MUL_STEP_IFFT 8
     160             : 
     161             : /* SPM */
     162             : 
     163             : /* small prime modulus - this contains some precomputed constants to
     164             :  * calculate modulo a sp */
     165             : typedef struct
     166             : {
     167             :   sp_t sp;              /* value of the sp */
     168             :   sp_t mul_c;           /* constant used for reduction mod sp */
     169             :   sp_t invm;            /* -1/sp mod 2^GMP_NUMB_BITS */
     170             :   sp_t Bpow;            /* B^(n+1) mod sp where the input N has n limbs */
     171             :   sp_t prim_root;
     172             :   sp_t inv_prim_root;
     173             :   sp_nttdata_t nttdata;
     174             :   sp_nttdata_t inttdata;
     175             :   spv_t scratch;
     176             : } __spm_struct;
     177             : 
     178             : typedef __spm_struct * spm_t;
     179             : 
     180             : /* MPZSPM */
     181             : 
     182             : typedef mpz_t * mpzv_t;
     183             : 
     184             : typedef struct
     185             :   {
     186             :     /* number of small primes needed to represent each coeff */
     187             :     unsigned int sp_num;
     188             :     spv_size_t max_ntt_size;
     189             :     
     190             :     mpz_t modulus;
     191             :     
     192             :     /* spm data */
     193             :     spm_t *spm;
     194             :     
     195             :     /* precomputed crt constants, see mpzspm.c */
     196             :     mpzv_t crt1, crt2;
     197             :     sp_t *crt3, **crt4, *crt5;
     198             : 
     199             :     /* product tree to speed up conversion from mpz to sp */
     200             :     mpzv_t *T;            /* product tree */
     201             :     unsigned int d;       /* ceil(log(sp_num)/log(2)) */
     202             :   } __mpzspm_struct;
     203             : 
     204             : typedef __mpzspm_struct * mpzspm_t;
     205             : 
     206             : /* MPZSPV */
     207             : 
     208             : /* sp representation of a mpz polynomial */
     209             : 
     210             : typedef spv_t * mpzspv_t;
     211             : 
     212             : 
     213             : /*************
     214             :  * FUNCTIONS *
     215             :  *************/
     216             : 
     217             : /* general */
     218             : 
     219             : static inline unsigned int
     220       75402 : ceil_log_2 (spv_size_t x)
     221             : {
     222       75402 :   unsigned int a = 0;
     223             :   
     224       75402 :   x--;
     225      508986 :   while (x)
     226             :     {
     227      433584 :       a++;
     228      433584 :       x >>= 1;
     229             :     }
     230       75402 :   return a;
     231             : }
     232             : 
     233             : /* Conversion functions sp_t <-> mpz_t. Using mpz_*_ui() functions is not
     234             :    portable as those take unsigned long's, but on some systems 
     235             :    (e.g. 64 bit Windows with Visual C), unsigned long has 32 bits while
     236             :    sp_t should use 64 */
     237             : 
     238             : static inline void 
     239     3623032 : mpz_set_sp (mpz_t m, const sp_t n)
     240             : {
     241             : #if SP_TYPE_BITS == 32
     242             :   mpz_set_ui(m, (unsigned long)(uint32_t)n);
     243             : #else /* 64-bit sp_t */
     244     3623032 :   mpz_set_uint64(m, n);
     245             : #endif
     246     3623032 : }
     247             : 
     248             : static inline sp_t 
     249     3590826 : mpz_get_sp (const mpz_t n)
     250             : {
     251             :   if (sizeof (sp_t) == sizeof (unsigned long))
     252             :     {
     253     3590826 :       return (sp_t) mpz_get_ui (n);
     254             :     }
     255             :   else if (sizeof (sp_t) == sizeof (mp_limb_t))
     256             :     {
     257             :       /* mpz_get_ui() returns the least significant bits of the absolute 
     258             :          value of its argument that fit in an unsigned long.
     259             :          In the current GMP implementation with sign/magnitude 
     260             :          representation, mpz_getlimbn() also returns the least sigificant
     261             :          bits of the absolute value. To allow for a future change to 
     262             :          2's-complement representation in GMP, we should explicitly
     263             :          use mpz_abs() to a temp var here. */
     264             :       return (sp_t) mpz_getlimbn (n, 0);
     265             :     }
     266             :   else
     267             :     {
     268             :       abort ();
     269             :     }
     270             : }
     271             : 
     272             : 
     273             : void * sp_aligned_malloc (size_t len);
     274             : void sp_aligned_free (void *newptr);
     275             : 
     276             : /* sp */
     277             : 
     278             : /* Routines for arithmetic on residues modulo a small prime
     279             :  *
     280             :  * All functions return values in the range 0 <= x < p.
     281             :  *
     282             :  * The variable name of the modulus is 'p' if the input must be prime,
     283             :  *                                     'm' if we also allow composites. */
     284             : 
     285             : 
     286 40450414285 : static inline sp_t sp_sub(sp_t a, sp_t b, sp_t m) 
     287             : {
     288             : #if (defined(__GNUC__) || defined(__ICL)) && \
     289             :     (defined(__x86_64__) || defined(__i386__))
     290 40450414285 :   sp_t t = 0, tr = a;
     291             : 
     292 40450414285 :   __asm__ (
     293             :     "sub %2, %0   # sp_sub: tr -= b\n\t"
     294             :     "cmovc %3, %1 # sp_sub: if (a < b) t = m\n\t"
     295             :     : "+&r" (tr), "+r" (t)
     296             :     : "g" (b), "g" (m)
     297             :     : "cc"
     298             :   );
     299             : 
     300 40450414285 :   return tr + t;
     301             : #elif defined(_MSC_VER) && !defined(_WIN64)
     302             :   __asm
     303             :     {
     304             :         mov     eax, a
     305             :         xor     edx, edx
     306             :         sub     eax, b
     307             :         cmovb   edx, m
     308             :         add     eax, edx
     309             :     }
     310             : #else
     311             :   if (a >= b)
     312             :     return a - b;
     313             :   else
     314             :     return a - b + m;
     315             : #endif
     316             : }
     317             : 
     318 18566024983 : static inline sp_t sp_add(sp_t a, sp_t b, sp_t m) 
     319             : {
     320             : #if (defined(__GNUC__) || defined(__ICL)) && \
     321             :     (defined(__x86_64__) || defined(__i386__))
     322 18566024983 :   sp_t t = a - m, tr = a + b;
     323             : 
     324 18566024983 :   __asm__ (
     325             :     "add %2, %1    # sp_add: t += b\n\t"
     326             :     "cmovc %1, %0  # sp_add: if (cy) tr = t \n\t"
     327             :     : "+r" (tr), "+&r" (t)
     328             :     : "g" (b)
     329             :     : "cc"
     330             :   );
     331             : 
     332 18566024983 :   return tr;
     333             : #elif defined(_MSC_VER) && !defined(_WIN64)
     334             :   __asm
     335             :     {
     336             :         mov     eax, a
     337             :         add     eax, b
     338             :         mov     edx, eax
     339             :         sub     edx, m
     340             :         cmovnc  eax, edx
     341             :     }
     342             : #elif SP_NUMB_BITS <= W_TYPE_SIZE - 1
     343             :   sp_t t = a + b;
     344             :   if (t >= m)
     345             :     t -= m;
     346             :   return t;
     347             : #else
     348             :   return sp_sub(a, m - b, m);
     349             : #endif
     350             : }
     351             : 
     352             : /* functions used for modular reduction */
     353             : 
     354             : #if SP_NUMB_BITS <= W_TYPE_SIZE - 2
     355             : 
     356             :         /* having a small modulus allows the reciprocal
     357             :          * to be one bit larger, which guarantees that the
     358             :          * initial remainder fits in a word and also that at
     359             :          * most one correction is necessary */
     360             : 
     361             : #define sp_reciprocal(invxl,xl)              \
     362             :   do {                                       \
     363             :     ATTRIBUTE_UNUSED mp_limb_t dummy;        \
     364             :     udiv_qrnnd (invxl, dummy,                \
     365             :                 (sp_t) 1 << (2 * SP_NUMB_BITS + 1 -       \
     366             :                 W_TYPE_SIZE), 0, xl);        \
     367             :   } while (0)
     368             : 
     369 21903959941 : static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di)
     370             : {
     371             :   sp_t r;
     372             :   mp_limb_t q1, q2;
     373             :   ATTRIBUTE_UNUSED mp_limb_t tmp;
     374 21903959941 :   q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) |
     375 21903959941 :             nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE);
     376 21903959941 :   umul_ppmm (q2, tmp, q1, di);
     377 21903959941 :   r = nl - d * (q2 >> 1);
     378 21903959941 :   return sp_sub(r, d, d);
     379             : }
     380             : 
     381             : #else    /* big modulus; no shortcuts allowed */
     382             : 
     383             : #define sp_reciprocal(invxl,xl)              \
     384             :   do {                                       \
     385             :     ATTRIBUTE_UNUSED mp_limb_t dummy;        \
     386             :     udiv_qrnnd (invxl, dummy,                \
     387             :                 (sp_t) 1 << (2 * SP_NUMB_BITS -   \
     388             :                 W_TYPE_SIZE), 0, xl);        \
     389             :   } while (0)
     390             : 
     391             : static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di)
     392             : {
     393             :   mp_limb_t q1, q2, tmp, dqh, dql;
     394             :   q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) |
     395             :             nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE);
     396             :   umul_ppmm (q2, tmp, q1, di);
     397             :   umul_ppmm (dqh, dql, q2, d);
     398             : 
     399             :   tmp = nl;
     400             :   nl = tmp - dql;
     401             :   nh = nh - dqh - (nl > tmp);
     402             :   if (nh)
     403             :           nl -= d;
     404             :   nl = sp_sub(nl, d, d);
     405             :   return sp_sub(nl, d, d);
     406             : }
     407             : 
     408             : #endif
     409             : 
     410             : /* x*y mod m */
     411             : static inline sp_t
     412 21837287184 : sp_mul (sp_t x, sp_t y, sp_t m, sp_t d)
     413             : {
     414             :   sp_t u, v;
     415 21837287184 :   umul_ppmm (u, v, x, y);
     416 21837287184 :   return sp_udiv_rem (u, v, m, d);
     417             : }
     418             : 
     419             : /* x*y mod m */
     420             : static inline sp_t
     421    66672757 : sp_sqr (sp_t x, sp_t m, sp_t d)
     422             : {
     423             :   sp_t u, v;
     424    66672757 :   umul_ppmm (u, v, x, x);
     425    66672757 :   return sp_udiv_rem (u, v, m, d);
     426             : }
     427             : 
     428             : #define sp_neg(x,m) ((x) == (sp_t) 0 ? (sp_t) 0 : (m) - (x))
     429             : 
     430             : /* Returns x^a % m, uses a right-to-left powering ladder */
     431             : 
     432             : static inline sp_t
     433     2979380 : sp_pow (sp_t x, sp_t a, sp_t m, sp_t d)
     434             : {
     435     2979380 :   sp_t partial = 1;
     436             : 
     437             :   while (1)
     438             :     {
     439    63750089 :       if (a & 1)
     440    51468414 :         partial = sp_mul (x, partial, m, d);
     441             : 
     442    63750089 :       a >>= 1;
     443             : 
     444    63750089 :       if (!a)
     445     2979380 :         return partial;
     446             : 
     447    60770709 :       x = sp_sqr (x, m, d);
     448             :     }
     449             : }
     450             : 
     451             : /* 1/x mod p where d is p->mul_c */
     452             : #define sp_inv(x,p,d) sp_pow (x, (p) - 2, p, d)
     453             : 
     454             : /* x / 2 mod m */
     455             : #define sp_div_2(x,m) (((x) & 1) ? (m) - (((m) - (x)) >> 1) : ((x) >> 1))
     456             :   
     457             : int sp_prime (sp_t);
     458             : 
     459             : /* spm */
     460             : 
     461             : spm_t spm_init (spv_size_t, sp_t, mp_size_t);
     462             : void spm_clear (spm_t);
     463             : 
     464             : /* spv */
     465             : 
     466             : /* ASSIGNMENT */
     467             : 
     468             : void spv_set (spv_t, spv_t, spv_size_t);
     469             : void spv_rev (spv_t, spv_t, spv_size_t);
     470             : void spv_set_sp (spv_t, sp_t, spv_size_t);
     471             : void spv_set_zero (spv_t, spv_size_t);
     472             : 
     473             : /* ARITHMETIC */
     474             : 
     475             : /* add */
     476             : void spv_add (spv_t, spv_t, spv_t, spv_size_t, sp_t);
     477             : 
     478             : /* subtract */
     479             : void spv_neg (spv_t, spv_t, spv_size_t, sp_t);
     480             : 
     481             : /* pointwise multiplication */
     482             : void spv_pwmul (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t);
     483             : void spv_mul_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t, sp_t);
     484             : 
     485             : void spv_random (spv_t, spv_size_t, sp_t);
     486             : 
     487             : /* ntt_gfp */
     488             : 
     489             : void spv_ntt_gfp_dif (spv_t, spv_size_t, spm_t);
     490             : void spv_ntt_gfp_dit (spv_t, spv_size_t, spm_t);
     491             : 
     492             : /* mpzspm */
     493             : 
     494             : spv_size_t mpzspm_max_len (mpz_t);
     495             : mpzspm_t mpzspm_init (spv_size_t, mpz_t);
     496             : void mpzspm_clear (mpzspm_t);
     497             : 
     498             : /* mpzspv */
     499             : 
     500             : mpzspv_t mpzspv_init (spv_size_t, mpzspm_t);
     501             : void mpzspv_clear (mpzspv_t, mpzspm_t);
     502             : int mpzspv_verify (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
     503             : void mpzspv_set (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
     504             :     mpzspm_t);
     505             : void mpzspv_revcopy (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
     506             :     mpzspm_t);
     507             : void mpzspv_set_sp (mpzspv_t, spv_size_t, sp_t, spv_size_t, mpzspm_t);
     508             : void mpzspv_from_mpzv (mpzspv_t, const spv_size_t, const mpzv_t, 
     509             :                        const spv_size_t, mpzspm_t);
     510             : void mpzspv_reverse (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
     511             : void mpzspv_neg (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t,
     512             :     mpzspm_t);
     513             : void mpzspv_add (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t,
     514             :     spv_size_t, spv_size_t, mpzspm_t);
     515             : void mpzspv_to_mpzv (mpzspv_t, spv_size_t, mpzv_t, spv_size_t, mpzspm_t);
     516             : void mpzspv_normalise (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
     517             : void mpzspv_pwmul (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t, 
     518             :     spv_size_t, spv_size_t, mpzspm_t);
     519             : void mpzspv_to_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int,
     520             :     mpzspm_t);
     521             : void mpzspv_from_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, mpzspm_t);
     522             : void mpzspv_mul_ntt (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, 
     523             :     mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int, spv_size_t, mpzspm_t, 
     524             :     int);
     525             : void mpzspv_random (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t);
     526             : void mpzspv_to_dct1 (mpzspv_t, mpzspv_t, spv_size_t, spv_size_t, mpzspv_t, 
     527             :     mpzspm_t);
     528             : void mpzspv_mul_by_dct (mpzspv_t, const mpzspv_t, spv_size_t, const mpzspm_t, 
     529             :     int);
     530             : void mpzspv_sqr_reciprocal (mpzspv_t, spv_size_t, const mpzspm_t);
     531             : 
     532             : #endif /* _SP_H */

Generated by: LCOV version 1.14