LCOV - code coverage report
Current view: top level - ecm - ecm_ntt.c (source / functions) Hit Total Coverage
Test: unnamed Lines: 163 174 93.7 %
Date: 2022-03-21 11:19:20 Functions: 6 6 100.0 %

          Line data    Source code
       1             : /* ecm_ntt.c - high level poly functions to interface between ecm and sp
       2             : 
       3             : Copyright 2005, 2006, 2007, 2008, 2009, 2011, 2012 Dave Newman,
       4             : Paul Zimmermann, Alexander Kruppa.
       5             : 
       6             : This file is part of the ECM Library.
       7             : 
       8             : The ECM Library is free software; you can redistribute it and/or modify
       9             : it under the terms of the GNU Lesser General Public License as published by
      10             : the Free Software Foundation; either version 3 of the License, or (at your
      11             : option) any later version.
      12             : 
      13             : The ECM Library is distributed in the hope that it will be useful, but
      14             : WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15             : or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16             : License for more details.
      17             : 
      18             : You should have received a copy of the GNU Lesser General Public License
      19             : along with the ECM Library; see the file COPYING.LIB.  If not, see
      20             : http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21             : 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22             : 
      23             : #include <stdio.h>
      24             : #include <stdlib.h>
      25             : #include <string.h>
      26             : #include "sp.h"
      27             : #include "ecm-impl.h"
      28             : 
      29             : #ifdef HAVE_UNISTD_H
      30             : #include <unistd.h> /* for unlink */
      31             : #endif
      32             : 
      33             : #define UNUSED 0
      34             : 
      35             : /* memory: 4 * len mpspv coeffs */
      36             : void
      37       21277 : ntt_mul (mpzv_t r, mpzv_t x, mpzv_t y, spv_size_t len, mpzv_t t,
      38             :     int monic, mpzspm_t mpzspm)
      39             : {
      40             :   mpzspv_t u, v;
      41             :         
      42       21277 :   if (len < MUL_NTT_THRESHOLD)
      43             :     {
      44        1366 :       list_mul (r, x, len, y, len, monic, t);
      45        1366 :       return;
      46             :     }
      47             : 
      48       19911 :   u = mpzspv_init (2 * len, mpzspm);
      49       19911 :   v = mpzspv_init (2 * len, mpzspm);
      50             :   
      51       19911 :   mpzspv_from_mpzv (v, 0, y, len, mpzspm);
      52       19911 :   mpzspv_from_mpzv (u, 0, x, len, mpzspm);
      53             : 
      54       19911 :   mpzspv_mul_ntt(u, 0, u, 0, len, v, 0, len, 2 * len, monic, 
      55             :     monic ? 2 * len : 0, mpzspm, 
      56             :     NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
      57       19911 :   mpzspv_to_mpzv (u, 0, r, 2 * len - 1 + monic, mpzspm);
      58             :   
      59       19911 :   mpzspv_clear (u, mpzspm);
      60       19911 :   mpzspv_clear (v, mpzspm);
      61             : }
      62             : 
      63             : /* memory: 2 * len mpzspv coeffs */
      64             : void
      65        3108 : ntt_PolyFromRoots (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t,
      66             :     mpzspm_t mpzspm)
      67             : {
      68             :   mpzspv_t x;
      69             :   spv_size_t i, m;
      70             :   
      71             :   ASSERT (len == ((spv_size_t)1) << ceil_log2 (len));
      72             : 
      73        3108 :   if (len <= MUL_NTT_THRESHOLD)
      74             :   {
      75        2479 :     PolyFromRoots (r, a, len, t, mpzspm->modulus);
      76        2479 :     return;
      77             :   }
      78             :   
      79         629 :   x = mpzspv_init (2 * len, mpzspm);
      80             :   
      81        6251 :   for (i = 0; i < len; i += MUL_NTT_THRESHOLD)
      82             :     {
      83        5622 :       PolyFromRoots (r, a + i, MUL_NTT_THRESHOLD, t, mpzspm->modulus);
      84        5622 :       mpzspv_from_mpzv (x, 2 * i, r, MUL_NTT_THRESHOLD, mpzspm);
      85             :     }
      86             :   
      87        1973 :   for (m = MUL_NTT_THRESHOLD; m < len; m *= 2)
      88             :     {
      89        6337 :       for (i = 0; i < 2 * len; i += 4 * m)
      90             :         {
      91        4993 :           mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm,
      92             :             NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
      93             :           
      94        4993 :           if (2 * m < len)
      95        4364 :             mpzspv_normalise (x, i, 2 * m, mpzspm);
      96             :         }         
      97             :     }
      98             :       
      99         629 :   mpzspv_to_mpzv (x, 0, r, len, mpzspm);
     100             : 
     101         629 :   mpzspv_clear (x, mpzspm);
     102             : }
     103             : 
     104             :   
     105             : /* memory: 2 * len mpzspv coeffs */
     106             : int
     107        1641 : ntt_PolyFromRoots_Tree (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t,
     108             :     int dolvl, mpzspm_t mpzspm, mpzv_t *Tree, FILE *TreeFile)
     109             : {
     110             :   mpzspv_t x;
     111             :   spv_size_t i, m, m_max;
     112             :   mpzv_t src;
     113        1641 :   mpzv_t *dst = Tree + ceil_log2 (len) - 1;
     114             : 
     115             :   ASSERT (len == ((spv_size_t)1) << ceil_log2 (len));
     116             :   
     117        1641 :   x = mpzspv_init (2 * len, mpzspm);
     118             :   
     119        1641 :   if (dolvl >= 0)
     120             :     {
     121         835 :       src = a;
     122         835 :       dst = &r;
     123             :     }
     124             :   else  
     125             :     {
     126             :       /* Copy the roots into the destination level of the tree (negating
     127             :          if so desired), set the source to this level (which now contains 
     128             :          the possibly negated roots), and advance the destination level 
     129             :          of the tree to the next level */
     130         806 :       src = *dst;
     131             :       /* we consider x + root[i], which means we consider negated roots */
     132         806 :       list_set (*dst--, a, len);
     133             :     }
     134             :   
     135        1641 :   m = (dolvl == -1) ? 1 : 1 << (ceil_log2 (len) - 1 - dolvl);
     136        1641 :   m_max = (dolvl == -1) ? len : 2 * m;
     137             :   
     138        7876 :   for (; m < m_max && m < MUL_NTT_THRESHOLD; m *= 2)
     139             :     {
     140             :       /* dst = &r anyway for dolvl != -1 */
     141        6235 :       if (m == len / 2)
     142         707 :         dst = &r;
     143             :       
     144        6235 :       if (TreeFile && list_out_raw (TreeFile, src, len) == ECM_ERROR)
     145             :         {
     146           0 :           outputf (OUTPUT_ERROR, "Error writing product tree of F\n");
     147           0 :           return ECM_ERROR;
     148             :         }
     149             : 
     150     1009444 :       for (i = 0; i < len; i += 2 * m)
     151     1003209 :         list_mul (t + i, src + i, m, src + i + m, m, 1, t + len);
     152             : 
     153        6235 :       list_mod (*dst, t, len, mpzspm->modulus);
     154             :       
     155        6235 :       src = *dst--;
     156             :     }
     157             :   
     158        2040 :   for (; m < m_max; m *= 2)
     159             :     {
     160             :       ASSERT (m > 1); /* This code does not do the sign change. Let's assume
     161             :                          MUL_NTT_THRESHOLD is always large enough that the
     162             :                          degree 1 product are done in the above loop */
     163             :       /* dst = &r anyway for dolvl != -1 */
     164         399 :       if (m == len / 2)
     165         213 :         dst = &r;
     166             :       
     167        3204 :       for (i = 0; i < 2 * len; i += 4 * m)
     168             :         {
     169        2949 :           if (TreeFile &&
     170         144 :               list_out_raw (TreeFile, src + i / 2, 2 * m) == ECM_ERROR)
     171           0 :             return ECM_ERROR;
     172             :           
     173        2805 :           mpzspv_from_mpzv (x, i, src + i / 2, m, mpzspm);
     174        2805 :           mpzspv_from_mpzv (x, i + 2 * m, src + i / 2 + m, m, mpzspm);
     175        2805 :           mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm,
     176             :             NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     177        2805 :           mpzspv_to_mpzv (x, i, *dst + i / 2, 2 * m, mpzspm);
     178             : 
     179             :           /* we only do the mod reduction to reduce the file size a bit */
     180        2805 :           if (TreeFile)
     181         144 :             list_mod (*dst + i / 2, *dst + i / 2, 2 * m, mpzspm->modulus);
     182             :         }
     183             :     
     184         399 :       src = *dst--;
     185             :     }
     186             : 
     187        1641 :   mpzspv_clear (x, mpzspm);
     188             : 
     189        1641 :   return 0;
     190             : }
     191             : 
     192             : 
     193             : /* 2 NTTs of size 2 * len
     194             :  * 2 NTTs of size len
     195             :  *
     196             :  * memory: 2 * len mpzspv coeffs */
     197             : void
     198       13085 : ntt_PrerevertDivision (mpzv_t a, mpzv_t b, mpzv_t invb, mpzspv_t sp_b,
     199             :     mpzspv_t sp_invb, spv_size_t len, mpzv_t t, mpzspm_t mpzspm)
     200             : {
     201             :   mpzspv_t x;
     202             :   
     203       13085 :   if (len < PREREVERTDIVISION_NTT_THRESHOLD)
     204             :     {
     205         673 :       PrerevertDivision (a, b, invb, len, t, mpzspm->modulus);
     206         673 :       return;
     207             :     }
     208             :   
     209       12412 :   x = mpzspv_init (2 * len, mpzspm);
     210             : 
     211             :   /* y = TOP (TOP (a) * invb) */
     212       12412 :   mpzspv_set_sp (x, 0, 0, len + 1, mpzspm);
     213       12412 :   mpzspv_from_mpzv (x, len + 1, a + len, len - 1, mpzspm);
     214       12412 :   mpzspv_mul_ntt (x, 0, x, 0, 2 * len, sp_invb, 0, UNUSED, 2 * len, 0, 0, mpzspm,
     215             :     NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     216       12412 :   mpzspv_normalise (x, 0, len, mpzspm);
     217             :   
     218       12412 :   mpzspv_mul_ntt (x, 0, x, 0, len, sp_b, 0, UNUSED, len, 0, 0, mpzspm, 
     219             :     NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     220       12412 :   mpzspv_to_mpzv (x, 0, t, len, mpzspm);
     221             :   
     222       12412 :   mpzspv_clear (x, mpzspm);
     223             :  
     224       12412 :   list_sub (t, t, a + len, len - 1);
     225       12412 :   list_sub (a, a, t, len);
     226             :   /* can we avoid this mod without risking overflow later? */
     227       12412 :   list_mod (a, a, len, mpzspm->modulus);
     228             : }
     229             : 
     230             : /* memory: 7/2 * len mpzspv coeffs */
     231         929 : void ntt_PolyInvert (mpzv_t q, mpzv_t b, spv_size_t len, mpzv_t t,
     232             :     mpzspm_t mpzspm)
     233             : {
     234         929 :   spv_size_t k = POLYINVERT_NTT_THRESHOLD / 2;
     235             :   mpzspv_t w, x, y, z;
     236             :   
     237         929 :   if (len < POLYINVERT_NTT_THRESHOLD)
     238             :     {
     239         615 :       PolyInvert (q, b, len, t, mpzspm->modulus);
     240         615 :       return;
     241             :     }
     242             : 
     243         314 :   PolyInvert (q + len - k, b + len - k, k, t, mpzspm->modulus);
     244             :   
     245         314 :   w = mpzspv_init (len / 2, mpzspm);
     246         314 :   x = mpzspv_init (len, mpzspm);
     247         314 :   y = mpzspv_init (len, mpzspm);
     248         314 :   z = mpzspv_init (len, mpzspm);
     249             :   
     250         314 :   mpzspv_from_mpzv (x, 0, q + len - k - 1, k + 1, mpzspm);
     251         314 :   mpzspv_from_mpzv (y, 0, b, len - 1, mpzspm);
     252             :   
     253         934 :   for (; k < len; k *= 2)
     254             :     {
     255         620 :       mpzspv_set (w, 0, x, 1, k, mpzspm);
     256         620 :       mpzspv_set (z, 0, y, len - 2 * k, 2 * k - 1, mpzspm);
     257         620 :       mpzspv_mul_ntt (z, 0, z, 0, 2 * k - 1, x, 0, k + 1, 2 * k, 0, 0, mpzspm, 
     258             :         NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     259         620 :       mpzspv_normalise (z, k, k, mpzspm);
     260         620 :       mpzspv_neg (z, 0, z, k, k, mpzspm);
     261             :       
     262         620 :       mpzspv_mul_ntt (x, 0, x, 0, 0, z, 0, k, 2 * k, 0, 0, mpzspm, 
     263             :         NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     264         620 :       if (2 * k < len)
     265         306 :         mpzspv_normalise (x, k, k, mpzspm);
     266         620 :       mpzspv_set (x, 1, x, k, k, mpzspm); /* legal overlap */
     267         620 :       mpzspv_set (x, k + 1, w, 0, MIN(k, len / 2 - 1), mpzspm);
     268             :     }
     269             : 
     270         314 :   mpzspv_to_mpzv (x, 1, q, len - POLYINVERT_NTT_THRESHOLD / 2, mpzspm);
     271             :  
     272             : #if defined DEBUG
     273             :   ntt_mul (t, q, b, len, NULL, 0, mpzspm);
     274             :   list_mod (t, t, 2 * len - 1, mpzspm->modulus);
     275             :   
     276             :   spv_size_t i;
     277             :   for (i = len - 1; i < 2 * len - 2; i++)
     278             :     if (mpz_cmp_ui (t[i], 0))
     279             :       printf ("error in ntt_PolyInvert\n");
     280             :   if (mpz_cmp_ui (t[2 * len - 2], 1))
     281             :     printf ("error in ntt_PolyInvert-\n");
     282             : #endif
     283             : 
     284         314 :   mpzspv_clear (w, mpzspm);
     285         314 :   mpzspv_clear (x, mpzspm);
     286         314 :   mpzspv_clear (y, mpzspm);
     287         314 :   mpzspv_clear (z, mpzspm);
     288             : }
     289             : 
     290             : 
     291             : /* memory: 4 * len mpzspv coeffs */
     292             : int
     293         916 : ntt_polyevalT (mpzv_t b, spv_size_t len, mpzv_t *Tree, mpzv_t T,
     294             :                    mpzspv_t sp_invF, mpzspm_t mpzspm, char *TreeFilenameStem)
     295             : {
     296             :   spv_size_t m, i;
     297         916 :   FILE *TreeFile = NULL;
     298             :   /* assume this "small" malloc will not fail in normal usage */
     299         916 :   char *TreeFilename = NULL;
     300         916 :   mpzv_t *Tree_orig = Tree;
     301         916 :   int level = 0; /* = ceil_log2 (len / m) - 1 */
     302         916 :   mpzspv_t x = mpzspv_init (2 * len, mpzspm);
     303         916 :   mpzspv_t y = mpzspv_init (2 * len, mpzspm);
     304             : 
     305         916 :   if (TreeFilenameStem)
     306             :     {
     307         113 :       TreeFilename = (char *) malloc (strlen (TreeFilenameStem) + 1 + 2 + 1);
     308         113 :       if (TreeFilename == NULL)
     309             :         {
     310           0 :           fprintf (stderr, "Cannot allocate memory in ntt_polyevalT\n");
     311           0 :           exit (1);
     312             :         }
     313             :     }
     314             :   
     315         916 :   mpzspv_from_mpzv (x, 0, b, len, mpzspm);
     316         916 :   mpzspv_mul_ntt(x, 0, x, 0, len, sp_invF, 0, UNUSED, 2 * len, 0, 0, mpzspm,
     317             :     NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     318         916 :   mpzspv_normalise (x, len - 1, len, mpzspm);
     319         916 :   mpzspv_set (y, 0, x, len - 1, len, mpzspm); /* y = high (b * invF) */
     320         916 :   mpzspv_reverse (y, 0, len, mpzspm); /* y = rev (high (b * invF)) */
     321             :     
     322        1685 :   for (m = len / 2; m >= POLYEVALT_NTT_THRESHOLD; m /= 2)
     323             :     {
     324         769 :       if (TreeFilenameStem)
     325             :         {
     326         140 :           Tree = &T;
     327             :           
     328         140 :           sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level);
     329             :           
     330         140 :           TreeFile = fopen (TreeFilename, "rb");
     331         140 :           if (TreeFile == NULL)
     332             :             {
     333           0 :               outputf (OUTPUT_ERROR,
     334             :                   "Error opening file %s for product tree of F\n",
     335             :                         TreeFilename);
     336           0 :               mpzspv_clear (x, mpzspm);
     337           0 :               mpzspv_clear (y, mpzspm);
     338           0 :               return ECM_ERROR;
     339             :             }
     340             : 
     341         140 :           list_inp_raw (*Tree, TreeFile, len);
     342             : 
     343         140 :           fclose (TreeFile);
     344         140 :           unlink (TreeFilename);
     345             :         }
     346             : 
     347        6690 :       for (i = 0; i < len; i += 2 * m)
     348             :         {
     349             :           
     350        5921 :           list_revert (*Tree + i, m);
     351        5921 :           mpzspv_set_sp (x, 0, 1, 1, mpzspm);
     352        5921 :           mpzspv_from_mpzv (x, 1, *Tree + i, m, mpzspm);
     353             :           /* x contains reversed monic poly */
     354        5921 :           mpzspv_mul_ntt (x, 0, x, 0, m + 1, y, i, 2 * m, 2 * m, 0, 0, mpzspm, 
     355             :             NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     356        5921 :           if (m > POLYEVALT_NTT_THRESHOLD)
     357        2766 :             mpzspv_normalise (x, m, m, mpzspm);
     358             :             
     359        5921 :           list_revert (*Tree + i + m, m);
     360        5921 :           mpzspv_set_sp (x, 2 * m, 1, 1, mpzspm);
     361        5921 :           mpzspv_from_mpzv (x, 2 * m + 1, *Tree + i + m, m, mpzspm);
     362        5921 :           mpzspv_mul_ntt(x, 2 * m, x, 2 * m, m + 1, y, i, UNUSED, 2 * m, 0, 0, mpzspm, 
     363             :             NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT);
     364        5921 :           if (m > POLYEVALT_NTT_THRESHOLD)
     365        2766 :             mpzspv_normalise (x, 3 * m, m, mpzspm);
     366             :           
     367        5921 :           mpzspv_set (y, i, x, 3 * m, m, mpzspm);
     368        5921 :           mpzspv_set (y, i + m, x, m, m, mpzspm);
     369             :         }
     370             :       
     371         769 :       Tree++;
     372         769 :       level++;
     373             :     }
     374             :     
     375         916 :   mpzspv_clear (x, mpzspm);
     376         916 :   mpzspv_to_mpzv (y, 0, T, len, mpzspm); /* T = rev (high (b * invF)) */
     377         916 :   mpzspv_clear (y, mpzspm);
     378     1159402 :   for (i = 0; i < len; i++)
     379     1158486 :     mpz_mod (T[i], T[i], mpzspm->modulus);
     380             : 
     381        6747 :   for (; m >= 1; m /= 2)
     382             :     {
     383        5831 :       if (TreeFilenameStem)
     384             :         {
     385         684 :           sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level);
     386             : 
     387         684 :           TreeFile = fopen (TreeFilename, "rb");
     388         684 :           if (TreeFile == NULL)
     389             :             {
     390           0 :               outputf (OUTPUT_ERROR,
     391             :                   "Error opening file %s for product tree of F\n",
     392             :                         TreeFilename);
     393           0 :               return ECM_ERROR;
     394             :             }
     395             :         }
     396             :       
     397        5831 :       TUpTree (T, Tree_orig, len, T + len, level++, 0,
     398        5831 :           mpzspm->modulus, TreeFile);
     399             : 
     400        5831 :       if (TreeFilenameStem)
     401             :         {
     402         684 :           fclose (TreeFile);
     403         684 :           unlink (TreeFilename);
     404             :         }
     405             :     }
     406             :   
     407         916 :   if (TreeFilenameStem)
     408         113 :     free (TreeFilename);
     409         916 :   list_swap (b, T, len);
     410         916 :   return 0;
     411             : }

Generated by: LCOV version 1.14