LCOV - code coverage report
Current view: top level - ecm - tune.c (source / functions) Hit Total Coverage
Test: unnamed Lines: 230 240 95.8 %
Date: 2022-03-21 11:19:20 Functions: 22 22 100.0 %

          Line data    Source code
       1             : /* Tune program for GMP-ECM.
       2             : 
       3             : Copyright 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Paul Zimmermann,
       4             : Alexander Kruppa, Dave Newman and Jason Papadopoulos.
       5             : 
       6             : This program is free software; you can redistribute it and/or modify
       7             : it under the terms of the GNU General Public License as published by
       8             : the Free Software Foundation; either version 3 of the License, or (at your
       9             : option) any later version.
      10             : 
      11             : This program is distributed in the hope that it will be useful, but
      12             : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      13             : or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      14             : more details.
      15             : 
      16             : You should have received a copy of the GNU General Public License
      17             : along with this program; see the file COPYING.  If not, see
      18             : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      19             : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      20             : 
      21             : #include <stdio.h>
      22             : #include <stdlib.h>
      23             : #include "ecm-gmp.h"
      24             : #include "ecm-impl.h"
      25             : 
      26             : /* 250ms, we (probably) don't need any more precision */
      27             : #define GRANULARITY 250
      28             : #define MAX_LOG2_LEN 18 /* 2 * 131072 */
      29             : #define MAX_LEN (1U << max_log2_len)
      30             : #define MAX_LOG2_MPZSPV_NORMALISE_STRIDE (MIN (12, max_log2_len))
      31             : /* we currently optimize GMP-ECM for a 200-digit number */
      32             : #define M_str "29799904256775982671863388319999573561548825027149399972531599612392671227006866151136667908641695103422986028076864929902803267437351318167549013218980573566942647077444419419003164546362008247462049"
      33             : 
      34             : #define ELAPSED elltime (__st, cputime () )
      35             : #define TUNE_FUNC_START(x)                   \
      36             : double x (size_t n)                          \
      37             :   { unsigned int __i, __k = 1; long __st;
      38             : 
      39             : /* Keep doubling the number of iterations until the timing is 
      40             :    at least GRANULARITY */
      41             : #define TUNE_FUNC_LOOP(x)                    \
      42             :   do {                                       \
      43             :     do {                                     \
      44             :       __st = cputime ();                     \
      45             :       for (__i = 0; __i < __k; __i++) { x; } \
      46             :       __k *= 2;                              \
      47             :     } while (ELAPSED < GRANULARITY);         \
      48             :     __k /= 2;                                \
      49             :     __st = ELAPSED;                          \
      50             :   } while (0)
      51             : 
      52             : #define TUNE_FUNC_END(x)                     \
      53             :   if (tune_verbose)                          \
      54             :     fprintf (stderr, #x "(%2ld) = %f\n", (long)n, (double) __k / (double) __st); \
      55             :   return (double) __k / (double) __st; }
      56             : 
      57             : 
      58             : /* Throughout, each function pointer points to a function
      59             :  * 
      60             :  *   double f0 (size_t n);
      61             :  *
      62             :  * that runs for at least GRANULARITY ms and then returns the number of
      63             :  * iterations performed per ms.
      64             :  *
      65             :  * X_Y_THRESHOLD denotes the threshold at which to start using Y for X. */
      66             : 
      67             : 
      68             : mpz_t M; /* yes, global variables */
      69             : gmp_randstate_t gmp_randstate;
      70             : size_t mp_size;
      71             : mpzspm_t mpzspm;
      72             : mpzv_t x, y, z, t;
      73             : spm_t spm;
      74             : spv_t spv;
      75             : mpzspv_t mpzspv;
      76             : int tune_verbose;
      77             : int max_log2_len = MAX_LOG2_LEN;
      78             : int min_log2_len = 3;
      79             : 
      80             : size_t MPZMOD_THRESHOLD;
      81             : size_t REDC_THRESHOLD;
      82             : size_t NTT_GFP_TWIDDLE_DIF_BREAKOVER = MAX_LOG2_LEN;
      83             : size_t NTT_GFP_TWIDDLE_DIT_BREAKOVER = MAX_LOG2_LEN;
      84             : size_t MUL_NTT_THRESHOLD;
      85             : size_t PREREVERTDIVISION_NTT_THRESHOLD;
      86             : size_t POLYINVERT_NTT_THRESHOLD;
      87             : size_t POLYEVALT_NTT_THRESHOLD;
      88             : size_t MPZSPV_NORMALISE_STRIDE = 256;
      89             : 
      90             : void
      91      786432 : mpz_quick_random (mpz_t x, mpz_t M)
      92             : {
      93      786432 :   mpz_urandomm (x, gmp_randstate, M);
      94      786432 : }
      95             : 
      96             : 
      97             : double
      98         162 : tune_mpres_mul (mp_size_t limbs, int repr)
      99             : {
     100             :   mpmod_t modulus;
     101             :   mpres_t x, y, z;
     102             :   mpz_t N, p, q;
     103         162 :   unsigned int __k = 1, __i;
     104             :   long __st;
     105             : 
     106         162 :   mpz_init (N);
     107         162 :   mpz_init (p);
     108         162 :   mpz_init (q);
     109             :   
     110             :   /* No need to generate a probable prime, just ensure N is not
     111             :      divisible by 2 or 3 */
     112             :   do
     113             :     {
     114         162 :       mpz_urandomb (N, gmp_randstate, limbs * GMP_NUMB_BITS);
     115         341 :       while (mpz_gcd_ui (NULL, N, 6) != 1)
     116         179 :         mpz_add_ui (N, N, 1);
     117             :     }
     118         162 :   while ((mp_size_t) mpz_size (N) != limbs);
     119             :   
     120         162 :   if (repr == ECM_MOD_MPZ)
     121          81 :     mpmod_init_MPZ (modulus, N);
     122          81 :   else if (repr == ECM_MOD_MODMULN)
     123          27 :     mpmod_init_MODMULN (modulus, N);
     124          54 :   else if (repr == ECM_MOD_REDC)
     125          54 :     mpmod_init_REDC (modulus, N);
     126             : 
     127         162 :   mpz_urandomm (p, gmp_randstate, N);
     128         162 :   mpz_urandomm (q, gmp_randstate, N);
     129             :   
     130         162 :   mpres_init (x, modulus);
     131         162 :   mpres_init (y, modulus);
     132         162 :   mpres_init (z, modulus);
     133             : 
     134         162 :   mpres_set_z (x, p, modulus);
     135         162 :   mpres_set_z (y, q, modulus);
     136             : 
     137    22014215 :   TUNE_FUNC_LOOP (mpres_mul (z, x, y, modulus));
     138             : 
     139         162 :   mpres_clear (x, modulus);
     140         162 :   mpres_clear (y, modulus);
     141         162 :   mpres_clear (z, modulus);
     142         162 :   mpmod_clear (modulus);
     143         162 :   mpz_clear (N);
     144         162 :   mpz_clear (p);
     145         162 :   mpz_clear (q);
     146             : 
     147         162 :   return (double) __k / (double) __st;
     148             : }
     149             : 
     150             : #if 0
     151             : double
     152             : tune_mpres_sqr (mp_size_t limbs, int repr)
     153             : {
     154             :   mpmod_t modulus;
     155             :   mpres_t x, z;
     156             :   mpz_t N, p;
     157             :   unsigned int __k = 1, __i;
     158             :   long __st;
     159             : 
     160             :   mpz_init (N);
     161             :   mpz_init (p);
     162             :   
     163             :   /* No need to generate a probable prime, just ensure N is not
     164             :      divisible by 2 or 3 */
     165             :   do
     166             :     {
     167             :       mpz_urandomb (N, gmp_randstate, limbs * GMP_NUMB_BITS);
     168             :       while (mpz_gcd_ui (NULL, N, 6) != 1)
     169             :         mpz_add_ui (N, N, 1);
     170             :     }
     171             :   while ((mp_size_t) mpz_size (N) != limbs);
     172             :   
     173             :   if (repr == ECM_MOD_MPZ)
     174             :     mpmod_init_MPZ (modulus, N);
     175             :   else if (repr == ECM_MOD_MODMULN)
     176             :     mpmod_init_MODMULN (modulus, N);
     177             :   else if (repr == ECM_MOD_REDC)
     178             :     mpmod_init_REDC (modulus, N);
     179             : 
     180             :   mpz_urandomm (p, gmp_randstate, N);
     181             :   
     182             :   mpres_init (x, modulus);
     183             :   mpres_init (z, modulus);
     184             : 
     185             :   mpres_set_z (x, p, modulus);
     186             : 
     187             :   TUNE_FUNC_LOOP (mpres_sqr (z, x, modulus));
     188             : 
     189             :   mpres_clear (x, modulus);
     190             :   mpres_clear (z, modulus);
     191             :   mpmod_clear (modulus);
     192             :   mpz_clear (N);
     193             :   mpz_clear (p);
     194             : 
     195             :   return (double) __k / (double) __st;
     196             : }
     197             : #endif
     198             : 
     199             : double
     200          81 : tune_mpres_mul_mpz (size_t n)
     201             : {
     202          81 :   return tune_mpres_mul (n, ECM_MOD_MPZ);
     203             : }
     204             : 
     205             : double
     206          27 : tune_mpres_mul_modmuln (size_t n)
     207             : {
     208          27 :   return tune_mpres_mul (n, ECM_MOD_MODMULN);
     209             : }
     210             : 
     211             : double
     212          54 : tune_mpres_mul_redc (size_t n)
     213             : {
     214          54 :   return tune_mpres_mul (n, ECM_MOD_REDC);
     215             : }
     216             : 
     217          15 : TUNE_FUNC_START (tune_spv_ntt_gfp_dif)
     218          15 :   NTT_GFP_TWIDDLE_DIF_BREAKOVER = n;
     219         150 :   TUNE_FUNC_LOOP (spv_ntt_gfp_dif (spv, max_log2_len, spm));
     220          15 : TUNE_FUNC_END (tune_spv_ntt_gfp_dif)
     221             : 
     222             : 
     223          15 : TUNE_FUNC_START (tune_spv_ntt_gfp_dit)
     224          15 :   NTT_GFP_TWIDDLE_DIT_BREAKOVER = n;
     225         150 :   TUNE_FUNC_LOOP (spv_ntt_gfp_dit (spv, max_log2_len, spm));
     226          15 : TUNE_FUNC_END (tune_spv_ntt_gfp_dit_recursive)
     227             : 
     228             : 
     229           9 : TUNE_FUNC_START (tune_ntt_mul)
     230           9 :   MUL_NTT_THRESHOLD = 0;
     231             : 
     232       19139 :   TUNE_FUNC_LOOP (ntt_mul (z, x, y, 1 << n, NULL, 1, mpzspm));
     233           9 : TUNE_FUNC_END (tune_ntt_mul)
     234             : 
     235             : 
     236           9 : TUNE_FUNC_START (tune_list_mul)
     237      283282 :   TUNE_FUNC_LOOP (list_mul (z, x, 1 << n, y, 1 << n, 1, t));
     238           9 : TUNE_FUNC_END (tune_list_mul)
     239             : 
     240             : 
     241           9 : TUNE_FUNC_START (tune_ntt_PrerevertDivision)
     242           9 :   PREREVERTDIVISION_NTT_THRESHOLD = 0;
     243             : 
     244       10946 :   TUNE_FUNC_LOOP (ntt_PrerevertDivision (z, x, y, mpzspv, mpzspv,
     245             :     1 << n, t, mpzspm));
     246           9 : TUNE_FUNC_END (tune_ntt_PrerevertDivision)
     247             : 
     248             : 
     249           9 : TUNE_FUNC_START (tune_PrerevertDivision)
     250       75136 :   TUNE_FUNC_LOOP (PrerevertDivision (z, x, y, 1 << n, t, mpzspm->modulus));
     251           9 : TUNE_FUNC_END (tune_PrerevertDivision)
     252             : 
     253             : 
     254           3 : TUNE_FUNC_START (tune_ntt_PolyInvert)
     255           3 :   POLYINVERT_NTT_THRESHOLD = 1 << n;
     256             :   
     257          14 :   TUNE_FUNC_LOOP (ntt_PolyInvert (z, x, 1 << n, t, mpzspm));
     258           3 : TUNE_FUNC_END (tune_ntt_PolyInvert)
     259             : 
     260             : 
     261           3 : TUNE_FUNC_START (tune_PolyInvert)
     262             :   
     263          40 :   TUNE_FUNC_LOOP (PolyInvert (z, x, 1 << n, t, mpzspm->modulus));
     264           3 : TUNE_FUNC_END (tune_PolyInvert)
     265             :   
     266             : 
     267           3 : TUNE_FUNC_START (tune_ntt_polyevalT)
     268             :   unsigned int i;
     269           3 :   mpzv_t *Tree = (mpzv_t *) malloc ((n + 1) * sizeof (mpzv_t));
     270           3 :   ASSERT_ALWAYS (Tree != NULL);
     271             :   
     272          49 :   for (i = 0; i <= n; i++)
     273          46 :     Tree[i] = x;
     274             : 
     275           3 :   POLYEVALT_NTT_THRESHOLD = 1 << n;
     276             : 
     277           9 :   TUNE_FUNC_LOOP (ntt_polyevalT (z, 1 << n, Tree, t, mpzspv, mpzspm, NULL));
     278             : 
     279           3 :   free (Tree);
     280           3 : TUNE_FUNC_END (tune_ntt_polyevalT) 
     281             : 
     282             : 
     283           3 : TUNE_FUNC_START (tune_polyevalT)
     284             :   unsigned int i;
     285           3 :   mpzv_t *Tree = (mpzv_t *) malloc ((n + 1) * sizeof (mpzv_t));
     286           3 :   ASSERT_ALWAYS (Tree != NULL);
     287             : 
     288          49 :   for (i = 0; i <= n; i++)
     289          46 :     Tree[i] = x;
     290             : 
     291           9 :   TUNE_FUNC_LOOP (polyeval_tellegen (z, 1 << n, Tree, t, 3 * (1 << n),
     292             :           x, mpzspm->modulus, NULL));
     293             : 
     294           3 :   free (Tree);
     295           3 : TUNE_FUNC_END (tune_polyevalT)
     296             : 
     297             : 
     298          11 : TUNE_FUNC_START (tune_mpzspv_normalise)
     299          11 :   MPZSPV_NORMALISE_STRIDE = 1 << n;
     300             :   
     301         396 :   TUNE_FUNC_LOOP (mpzspv_normalise (mpzspv, 0,
     302             :     1 << MAX_LOG2_MPZSPV_NORMALISE_STRIDE, mpzspm));
     303          11 : TUNE_FUNC_END (tune_mpzspv_normalise)
     304             : 
     305             : 
     306         495 : TUNE_FUNC_START (tune_ecm_mul_lo_n)
     307             :   mp_limb_t rp[2 * MPN_MUL_LO_THRESHOLD];
     308             :   mp_limb_t xp[MPN_MUL_LO_THRESHOLD];
     309             :   mp_limb_t yp[MPN_MUL_LO_THRESHOLD];
     310             : 
     311         495 :   if (n > 1 && n < (mp_size + 1) / 2)
     312         196 :     return 0.0;
     313             :   
     314         299 :   mpn_random (xp, mp_size);
     315         299 :   mpn_random (yp, mp_size);
     316             :   
     317         299 :   mpn_mul_lo_threshold[mp_size] = n;
     318             : 
     319  1189091237 :   TUNE_FUNC_LOOP (ecm_mul_lo_n (rp, xp, yp, mp_size));
     320         299 : TUNE_FUNC_END (tune_ecm_mul_lo_n)
     321             : 
     322             : /* Return the lowest n with min_n <= n < max_n such that
     323             :  * f1(t) >= f0(t) for all t in [n, n + k), or return max_n if no such
     324             :  * n exists. This function will typically return high values if there
     325             :  * is no 'clean' threshold between f0(n) and f1(n). */
     326             : size_t
     327           4 : crossover2 (double (*f0)(size_t), double (*f1)(size_t),
     328             :     size_t min_n, size_t max_n, size_t k)
     329             : {
     330           4 :   size_t n = min_n;
     331             :   size_t t;
     332             :   
     333          75 :   while (n < max_n)
     334             :     {
     335         100 :       for (t = MIN (max_n, n + k); t > n; t--)
     336             :         {
     337          99 :           if ((f0)(t - 1) > (f1)(t - 1))
     338          71 :             break;
     339             :         }
     340             : 
     341          72 :       if (t == n)
     342           1 :         return n;
     343             : 
     344          71 :       n = t;
     345             :     };
     346             : 
     347           3 :   return max_n;
     348             : }
     349             : 
     350             : 
     351             : /* Assume f0 and f1 are monotone decreasing. Return the first n in the range
     352             :  * [min_n, max_n) for which f1(n) >= f0(n), or return max_n if no such n
     353             :  * exists. We use a bisection algorithm so the function is fast but
     354             :  * may give slightly varied results. */
     355             : size_t
     356           8 : crossover (double (*f0)(size_t), double (*f1)(size_t),
     357             :     size_t min_n, size_t max_n)
     358             : {
     359             :   size_t mid_n;
     360             :   
     361             : #ifdef TUNE_SLOW
     362             :   return crossover2 (f0, f1, min_n, max_n, 1);
     363             : #endif
     364             :     
     365           8 :   if (min_n == max_n)
     366           2 :     return min_n;
     367             : 
     368           6 :   mid_n = (max_n + min_n) / 2;
     369           6 :   return ((f0)(mid_n) > (f1)(mid_n))
     370           6 :     ? crossover (f0, f1, mid_n + 1, max_n)
     371          12 :     : crossover (f0, f1, min_n, mid_n);
     372             : }
     373             : 
     374             : 
     375             : /* Return the n in the range [min_n, max_n) that maximises f(n).
     376             :  * We make no assumptions about the shape of f(n) and so evaluate
     377             :  * f at every point. */
     378             : size_t
     379          33 : maximise (double (*f)(size_t), size_t min_n, size_t max_n)
     380             : {
     381          33 :   size_t n, best_n = 0;
     382          33 :   double f_n, f_best_n = -1.0;
     383             : 
     384         569 :   for (n = min_n; n < max_n; n++)
     385             :     {
     386         536 :       f_n = f (n);
     387         536 :       if (f_n > f_best_n)
     388             :         {
     389         146 :           f_best_n = f_n;
     390         146 :           best_n = n;
     391             :         }
     392             :     }
     393             : 
     394          33 :   return best_n;
     395             : }
     396             : 
     397             : #if 0
     398             : /* Debugging. Print the value of f0(n) and f1(n) and which is fastest. */
     399             : void
     400             : print_timings (double (*f0)(size_t), double (*f1)(size_t),
     401             :   size_t min_n, size_t max_n)
     402             : {
     403             :   size_t n;
     404             :   double f0_n, f1_n;
     405             :   
     406             :   for (n = min_n; n < max_n; n++)
     407             :     {
     408             :       f0_n = (f0)(n);
     409             :       f1_n = (f1)(n);
     410             :       printf ("n=%2ld: %8.2f %8.2f (f%d)\n",
     411             :           (long) n, f0_n, f1_n, (f0_n <= f1_n) ? 1 : 0);
     412             :     }
     413             : }
     414             : #endif
     415             : 
     416             : static void
     417           1 : tune_list_mul_n ()
     418             : {
     419             :   size_t n;
     420           1 :   unsigned int __i, __k = 1, best[TUNE_LIST_MUL_N_MAX_SIZE];
     421             :   long __st;
     422             :   double st[4];
     423             : 
     424           1 :   ASSERT_ALWAYS (2 * TUNE_LIST_MUL_N_MAX_SIZE <= MAX_LEN);
     425             : 
     426           1 :   if (tune_verbose)
     427           1 :     printf ("Tuning list_mul_n\n");
     428           1 :   best[0] = 0;
     429          32 :   for (n = 1; n < TUNE_LIST_MUL_N_MAX_SIZE; n++)
     430             :     {
     431          31 :       if (tune_verbose)
     432          31 :         printf ("%"PRIu64":", n);
     433          31 :       __k = 1;
     434     7070130 :       TUNE_FUNC_LOOP(list_mul_n_basecase(z, x, y, n));
     435          31 :       st[0] = (double) __st / (double) __k;
     436          31 :       if (tune_verbose)
     437          31 :         printf (" basecase:%.2e", st[0]);
     438          31 :       best[n] = 0;
     439          31 :       __k = 1;
     440     7619014 :       TUNE_FUNC_LOOP(list_mul_n_karatsuba(z, x, y, n));
     441          31 :       st[1] = (double) __st / (double) __k;
     442          31 :       if (tune_verbose)
     443          31 :         printf (" karatsuba:%.2e", st[1]);
     444          31 :       if (st[1] < st[0])
     445          29 :         best[n] = 1;
     446          31 :       __k = 1;
     447     2744750 :       TUNE_FUNC_LOOP(list_mul_n_KS1(z, x, y, n));
     448          31 :       st[2] = (double) __st / (double) __k;
     449          31 :       if (tune_verbose)
     450          31 :         printf (" KS1:%.2e", st[2]);
     451          31 :       if (st[2] < st[best[n]])
     452           0 :         best[n] = 2;
     453          31 :       if (n >= 2)
     454             :         {
     455          30 :           __k = 1;
     456     1860002 :           TUNE_FUNC_LOOP(list_mul_n_KS2(z, x, y, n));
     457          30 :           st[3] = (double) __st / (double) __k;
     458          30 :           if (tune_verbose)
     459          30 :             printf (" KS2:%.2e", st[3]);
     460          30 :           if (st[3] < st[best[n]])
     461           0 :             best[n] = 3;
     462             :         }
     463          31 :       if (tune_verbose)     
     464          60 :         printf (" best:%s\n", (best[n] == 0) ? "basecase"
     465          29 :                 : (best[n] == 1) ? "kara"
     466          29 :                 : (best[n] == 2) ? "KS1" : "KS2");
     467             :     }
     468           1 :   printf ("#define LIST_MUL_TABLE {0");
     469          32 :   for (n = 1; n < TUNE_LIST_MUL_N_MAX_SIZE; n++)
     470          31 :     printf (",%u", best[n]);
     471           1 :   printf ("}\n");
     472           1 : }
     473             : 
     474             : int
     475           1 : main (int argc, char **argv)
     476             : {
     477             :   spv_size_t i;
     478             : 
     479           2 :   while (argc > 1)
     480             :     {
     481           1 :       if (strcmp (argv[1], "-v") == 0)
     482             :         {
     483           1 :           tune_verbose = 1;
     484           1 :           argc --;
     485           1 :           argv ++;
     486             :         }
     487           0 :       else if (argc > 2 && strcmp (argv[1], "-max_log2_len") == 0)
     488             :         {
     489           0 :           max_log2_len = atoi (argv[2]);
     490           0 :           if (max_log2_len < min_log2_len)
     491           0 :             max_log2_len = min_log2_len;
     492           0 :           argc -= 2;
     493           0 :           argv += 2;
     494             :         }
     495             :       else
     496             :         {
     497           0 :           fprintf (stderr, "Usage: tune [-v] [-max_log2_len nnn]\n");
     498           0 :           exit (1);
     499             :         }
     500             :     }
     501             :   
     502           1 :   gmp_randinit_default (gmp_randstate);
     503           1 :   mpz_init_set_str (M, M_str, 10);
     504             : 
     505           1 :   x = init_list (MAX_LEN);
     506           1 :   y = init_list (MAX_LEN);
     507           1 :   z = init_list (MAX_LEN);
     508           1 :   t = init_list (list_mul_mem (MAX_LEN / 2) + 3 * MAX_LEN / 2);
     509             :   
     510           1 :   mpzspm = mpzspm_init (MAX_LEN, M);
     511           1 :   ASSERT_ALWAYS (mpzspm != NULL);
     512           1 :   mpzspv = mpzspv_init (MAX_LEN, mpzspm);
     513           1 :   ASSERT_ALWAYS (mpzspv != NULL);
     514           1 :   mpzspv_random (mpzspv, 0, MAX_LEN, mpzspm);
     515             :   
     516      262145 :   for (i = 0; i < MAX_LEN; i++)
     517      262144 :     mpz_quick_random (x[i], M);
     518      262145 :   for (i = 0; i < MAX_LEN; i++)
     519      262144 :     mpz_quick_random (y[i], M);
     520      262145 :   for (i = 0; i < MAX_LEN; i++)
     521      262144 :     mpz_quick_random (z[i], M);    
     522             :   
     523           1 :   tune_list_mul_n ();
     524             :   
     525           1 :   spm = mpzspm->spm[0];
     526           1 :   spv = mpzspv[0];
     527             :   
     528           1 :   MPZMOD_THRESHOLD = crossover2 (tune_mpres_mul_modmuln, tune_mpres_mul_mpz,
     529             :       1, 512, 10);
     530             :   
     531           1 :   printf ("#define MPZMOD_THRESHOLD %lu\n", (unsigned long) MPZMOD_THRESHOLD);
     532             :   
     533           1 :   REDC_THRESHOLD = crossover2 (tune_mpres_mul_mpz, tune_mpres_mul_redc,
     534             :       MPZMOD_THRESHOLD, 512, 10);
     535             :   
     536           1 :   printf ("#define REDC_THRESHOLD %lu\n", (unsigned long) REDC_THRESHOLD);
     537             : 
     538           1 :   mpn_mul_lo_threshold[0] = 0;
     539           1 :   mpn_mul_lo_threshold[1] = 0;
     540             : 
     541           1 :   printf ("#define MPN_MUL_LO_THRESHOLD_TABLE {0, 0");
     542             : 
     543          31 :   for (mp_size = 2; mp_size < MPN_MUL_LO_THRESHOLD; mp_size++)
     544             :     {
     545          30 :       mpn_mul_lo_threshold[mp_size] = maximise (tune_ecm_mul_lo_n, 0, mp_size);
     546          30 :       printf (", %lu", (unsigned long) mpn_mul_lo_threshold[mp_size]);
     547          30 :       fflush (stdout);
     548             :     }
     549             : 
     550           1 :   printf ("}\n");
     551             :           
     552           1 :   NTT_GFP_TWIDDLE_DIF_BREAKOVER = maximise
     553             :       (tune_spv_ntt_gfp_dif, min_log2_len, max_log2_len);
     554             : 
     555           1 :   printf ("#define NTT_GFP_TWIDDLE_DIF_BREAKOVER %lu\n",
     556             :       (unsigned long) NTT_GFP_TWIDDLE_DIF_BREAKOVER);
     557             :    
     558           1 :   NTT_GFP_TWIDDLE_DIT_BREAKOVER = maximise
     559             :       (tune_spv_ntt_gfp_dit, min_log2_len, max_log2_len);
     560             : 
     561           1 :   printf ("#define NTT_GFP_TWIDDLE_DIT_BREAKOVER %lu\n",
     562             :       (unsigned long) NTT_GFP_TWIDDLE_DIT_BREAKOVER);
     563             :   
     564           1 :   MUL_NTT_THRESHOLD = 1 << crossover2 (tune_list_mul, tune_ntt_mul, 1,
     565             :       max_log2_len, 2);
     566             : 
     567           1 :   printf ("#define MUL_NTT_THRESHOLD %lu\n", (unsigned long) MUL_NTT_THRESHOLD);
     568             : 
     569           1 :   PREREVERTDIVISION_NTT_THRESHOLD = 1 << crossover2 (tune_PrerevertDivision,
     570             :       tune_ntt_PrerevertDivision, 1, max_log2_len, 2);
     571             : 
     572           1 :   printf ("#define PREREVERTDIVISION_NTT_THRESHOLD %lu\n",
     573             :       (unsigned long) PREREVERTDIVISION_NTT_THRESHOLD);
     574             : 
     575           1 :   POLYINVERT_NTT_THRESHOLD = 1 << crossover (tune_PolyInvert,
     576             :       tune_ntt_PolyInvert, 5, max_log2_len);
     577             : 
     578           1 :   printf ("#define POLYINVERT_NTT_THRESHOLD %lu\n", 
     579             :       (unsigned long) POLYINVERT_NTT_THRESHOLD);
     580             :   
     581           1 :   POLYEVALT_NTT_THRESHOLD = 1 << crossover (tune_polyevalT,
     582             :       tune_ntt_polyevalT, 5, max_log2_len);
     583             : 
     584           1 :   printf ("#define POLYEVALT_NTT_THRESHOLD %lu\n", 
     585             :       (unsigned long) POLYEVALT_NTT_THRESHOLD);
     586             :   
     587           2 :   MPZSPV_NORMALISE_STRIDE = 1 << maximise (tune_mpzspv_normalise,
     588           1 :       1, MAX_LOG2_MPZSPV_NORMALISE_STRIDE);
     589             :           
     590           1 :   printf ("#define MPZSPV_NORMALISE_STRIDE %lu\n", 
     591             :       (unsigned long) MPZSPV_NORMALISE_STRIDE);
     592             : 
     593           1 :   mpzspv_clear (mpzspv, mpzspm);
     594           1 :   mpzspm_clear (mpzspm);
     595             :   
     596           1 :   clear_list (x, MAX_LEN);
     597           1 :   clear_list (y, MAX_LEN);
     598           1 :   clear_list (z, MAX_LEN);
     599           1 :   clear_list (t, list_mul_mem (MAX_LEN / 2) + 3 * MAX_LEN / 2);
     600             : 
     601           1 :   mpz_clear (M);
     602             :   
     603           1 :   gmp_randclear (gmp_randstate);
     604             : 
     605           1 :   return 0;
     606             : }

Generated by: LCOV version 1.14