/* computing ufp(x) */

#define CHECK /* to check both algorithms return the same results */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#include <math.h>
#include <fpu_control.h>

static void
set_fpu_prec (void)
{
  fpu_control_t cw;

  _FPU_GETCW(cw);
  cw &= ~(_FPU_EXTENDED|_FPU_DOUBLE|_FPU_SINGLE);
  cw |= _FPU_DOUBLE;
  _FPU_SETCW(cw);
}
#define N 100000000

/* Algorithm 5 from the paper */
double
algo5 (double x)
{
  double phi = 4503599627370497.0;
  double psi = 0.99999999999999988897769753748;
  double q = phi * x;
  double r = psi * q;
  return q - r;
}

/* Algorithm 6 from the paper */
double
algo6 (double x)
{
  double psi = 0.99999999999999988897769753748;
  double a = psi * x;
  return x - a;
}

/* Algorithm 7 from the paper */
double
algo7 (double x)
{
  double psi = 9007199254740991.0;
  double a = psi * x;
  return 9007199254740992.0 * x - a;
}

/* Algorithm 8 from the paper */
double
algo8 (double x)
{
  double psi = 1.6653345369377348106354475021e-16;
  double a = x + psi * x;
  // double a = fma (psi, x, x);
  return a - x;
}

/* Algorithm 9 from the paper */
double
algo9 (double x)
{
  double psi = 1.1102230246251567869426645497e-16;
  double eta = 4.9406564584124654417656879287e-324;
  double e = psi * x + eta;
  double xsup = x + e;
  return xsup - x;
}

/* same using bit manipulations */
double
bitman (double x)
{
  union { double d; unsigned long n; } z;
  double xh;
  z.d = x;
  z.n &= ~0xfffffffffffffUL; /* zero the low 52 bits */
  return z.d;
}

int
main()
{
  double *x;
  clock_t c;

  set_fpu_prec ();

  x = malloc (N * sizeof (double));

  /* fill the table with random floats */
  for (int i = 0; i < N; i++)
    {
      /* warning: drand48() only generates the high 48 bits! */
      x[i] = drand48 () + (lrand48 () % 32) / 9007199254740992.0;
      x[i] = ldexp (x[i], 32);
    }

  double xh, xl, xh2, xl2;
#ifdef CHECK
  /* check all algorithms give the same value */
  for (int i = 0; i < N; i++)
    {
      xh = algo5 (x[i]);
      xh2 = bitman (x[i]);
      if (xh != xh2)
        {
          printf ("Error for x=%.16e\n", x[i]);
          printf ("algo5 gives ufp=%.16e\n", xh);
          printf ("bitman gives ufp=%.16e\n", xh2);
          exit (1);
        }
      xh2 = algo6 (x[i]);
      if (xh != xh2 * 4503599627370496.0)
        {
          printf ("Error for x=%.16e\n", x[i]);
          printf ("algo5 gives ufp=%.16e\n", xh);
          printf ("algo6 gives ulp=%.16e\n", xh2);
          exit (1);
        }
      xh2 = algo7 (x[i]);
      if (2.0 * xh != xh2)
        {
          printf ("Error for x=%.16e\n", x[i]);
          printf ("algo5 gives ufp=%.16e\n", xh);
          printf ("algo7 gives ufp=%.16e\n", xh2);
          exit (1);
        }
      xh2 = algo8 (x[i]);
      if (xh != xh2 * 4503599627370496.0)
        {
          printf ("Error for x=%.16e\n", x[i]);
          printf ("algo5 gives ufp=%.16e\n", xh);
          printf ("algo8 gives ufp=%.16e\n", xh2);
          exit (1);
        }
      xh2 = algo9 (x[i]);
      if (xh != xh2 * 4503599627370496.0)
        {
          printf ("Error for x=%.16e\n", x[i]);
          printf ("algo5 gives ufp=%.16e\n", xh);
          printf ("algo9 gives ufp=%.16e\n", xh2);
          exit (1);
        }
    }
#endif

  double s = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    s += algo5 (x[i]);
  printf ("Algo5:  s=%e time=%e\n",
          s, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  double t = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    t += bitman (x[i]);
  printf ("bitman: t=%e time=%e\n",
          t, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  double u = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    u += algo6 (x[i]);
  printf ("Algo6:  u=%e time=%e\n",
          u, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  double v = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    v += algo7 (x[i]);
  printf ("Algo7:  v=%e time=%e\n",
          v, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  double w = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    w += algo8 (x[i]);
  printf ("Algo8:  w=%e time=%e\n",
          w, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  w = 0;
  c = clock ();
  for (int i = 0; i < N; i++)
    w += algo9 (x[i]);
  printf ("Algo9:  w=%e time=%e\n",
          w, (double) (clock () - c) / (double) CLOCKS_PER_SEC);

  free (x);
  
  return 0;
}
