/*
mpfrx_fft

functions for the real fft and multiplication using it

Copyright (C) 2009, 2010, 2012 Andreas Enge

This file is part of the MPFRCX Library.

The MPFRCX Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The MPFRCX Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the MPFRCX library; see the file COPYING.LESSER.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA.
*/

#include "mpfrcx-impl.h"

/**************************************************************************/

static void mpfrx_dft_twoinone (mpc_t *F, mpc_t *G, mpfr_t *f, mpfr_t *g,
   int m1, int m2, int n, mpfr_t *twiddle, int direction) {
   /* computes simultaneously the two DFTs of order n of f and g and      */
   /* stores them in F and G; only the coefficients 0, ..., n/2 are kept. */
   /* The parameters zeta, twiddle and direction are the same as for      */
   /* mpcx_dftrb and are simply passed through.                           */
   /* n must be a power of 2, f must have exactly m1 and g exactly m2     */
   /* coefficients (with m1, m2 <= n), and F and G must have at least     */
   /* n/2 + 1 coefficients.                                               */
   /* Numerical problems if f and g have different magnitudes! We lose as */
   /* many digits as the difference in magnitude.                         */
   /* Makes only sense if all precisions are the same.                    */

   int      i;
   mpfr_exp_t f_min, f_max, g_min, g_max, delta_exp;
   mpc_t    *h;

   /* copy f + i g into h */
   h = (mpc_t *) malloc (n * sizeof (mpc_t));
   for (i = 0; i < n; i++)
      mpc_init2 (h [i], mpc_get_prec (F [0]));
   for (i = 0; i < m1; i++)
      mpfr_set (h [i]->re, f [i], GMP_RNDN);
   for (i = m1; i < n; i++)
      mpfr_set_ui (h [i]->re, 0, GMP_RNDN);
   for (i = 0; i < m2; i++)
      mpfr_set (h [i]->im, g [i], GMP_RNDN);
   for (i = m2; i < n; i++)
      mpfr_set_ui (h [i]->im, 0, GMP_RNDN);

   /* If f and g have different magnitudes, we lose as many digits as the */
   /* difference. Renormalise f to be close to g. As criterion, we choose */
   /* the maximal exponent of the coefficients.                           */
   f_min = f [0]->_mpfr_exp;
   f_max = f_min;
   for (i = 1; i < m1; i++)
      if (f [i]->_mpfr_exp < f_min)
         f_min = f [i]->_mpfr_exp;
      else if (f [i]->_mpfr_exp > f_max)
         f_max = f [i]->_mpfr_exp;
   g_min = g [0]->_mpfr_exp;
   g_max = g_min;
   for (i = 1; i < m2; i++)
      if (g [i]->_mpfr_exp < g_min)
         g_min = g [i]->_mpfr_exp;
      else if (g [i]->_mpfr_exp > g_max)
         g_max = g [i]->_mpfr_exp;
   delta_exp = g_max - f_max;
   if (delta_exp >= 0)
      for (i = 0; i < m1; i++)
         mpfr_mul_2ui (h [i]->re, h [i]->re, delta_exp, GMP_RNDN);
   else
      for (i = 0; i < m1; i++)
         mpfr_div_2ui (h [i]->re, h [i]->re, -delta_exp, GMP_RNDN);

   mpcx_dftrb (h, n, twiddle, 1, direction);

   mpfr_set (F [0]->re, h [0]->re, GMP_RNDN);
   mpfr_set_ui (F [0]->im, 0, GMP_RNDN);
   mpfr_set (G [0]->re, h [0]->im, GMP_RNDN);
   mpfr_set_ui (G [0]->im, 0, GMP_RNDN);
   for (i = 1; i < n/2; i++) {
      mpc_conj (F [i], h [n-i], MPC_RNDNN);
      mpc_add (F [i], F [i], h [i], MPC_RNDNN);
      mpc_div_2ui (F [i], F [i], 1, MPC_RNDNN);
      mpc_conj (G [i], h [n-i], MPC_RNDNN);
      mpc_sub (G [i], G [i], h [i], MPC_RNDNN);
      mpc_div_2ui (G [i], G [i], 1, MPC_RNDNN);
      mpc_mul_i (G [i], G [i], 1, MPC_RNDNN);
   }
   mpfr_set (F [n/2]->re, h [n/2]->re, GMP_RNDN);
   mpfr_set_ui (F [n/2]->im, 0, GMP_RNDN);
   mpfr_set (G [n/2]->re, h [n/2]->im, GMP_RNDN);
   mpfr_set_ui (G [n/2]->im, 0, GMP_RNDN);

   /* Correct F to invert the scaling. */
   if (delta_exp >= 0)
      for (i = 0; i <= n/2; i++)
         mpc_div_2ui (F [i], F [i], delta_exp, MPC_RNDNN);
   else
      for (i = 0; i <= n/2; i++)
         mpc_mul_2ui (F [i], F [i], -delta_exp, MPC_RNDNN);

   for (i = 0; i < n; i++)
      mpc_clear (h [i]);
   free (h);
}


/**************************************************************************/

static void mpfrx_dft_inv (mpfr_t *f, mpc_t *F, int m, int n, mpc_t *zeta,
                    mpfr_t *twiddle)
   /* computes n times the inverse DFT with a real sequence as result     */
   /* Only the first m coefficients are kept in f, with m <= n.           */
   /* The parameters zeta, twiddle and direction are the same as for      */
   /* mpcx_dftrb and are passed through.                                  */
   /* n must be a power of 2, f must have exactly m coefficients, and at  */
   /* least the n/2 + 1 first coefficients of F must be set.              */

{
   int i;
   mpc_t *G, tmp, tmp2;

   mpc_init2 (tmp, mpfr_get_prec (f [0]));
   mpc_init2 (tmp2, mpfr_get_prec (f [0]));

   /* prepare the auxiliary sequence G of length n/2 */
   G = (mpc_t *) malloc (n/2 * sizeof (mpc_t));
   mpc_init2 (G [0], mpfr_get_prec (f [0]));
   mpc_sub (G [0], F [0], F [n/2], MPC_RNDNN);
   mpc_mul_i (G [0], G [0], 1, MPC_RNDNN);
   mpc_add (G [0], G [0], F [0], MPC_RNDNN);
   mpc_add (G [0], G [0], F [n/2], MPC_RNDNN);
   for (i = 1; i < n/4; i++) {
      mpc_init2 (G [i], mpfr_get_prec (f [0]));
      mpc_conj (tmp, F [n/2 - i], MPC_RNDNN);
      mpc_sub (G [i], F [i], tmp, MPC_RNDNN);
      mpc_mul (G [i], G [i], zeta [n/4-i], MPC_RNDNN);
      mpc_add (G [i], G [i], F [i], MPC_RNDNN);
      mpc_add (G [i], G [i], tmp, MPC_RNDNN);
   }
   mpc_init2 (G [n/4], mpfr_get_prec (f [0]));
   mpc_mul_2ui (G [n/4], F [n/4], 1, MPC_RNDNN);
   for (i = n/4 + 1; i < n/2; i++) {
      mpc_init2 (G [i], mpfr_get_prec (f [0]));
      mpc_conj (tmp, F [n/2 - i], MPC_RNDNN);
      mpc_sub (G [i], F [i], tmp, MPC_RNDNN);
      mpc_conj (tmp2, zeta [i-n/4], MPC_RNDNN);
      mpc_mul (G [i], G [i], tmp2, MPC_RNDNN);
      mpc_add (G [i], G [i], F [i], MPC_RNDNN);
      mpc_add (G [i], G [i], tmp, MPC_RNDNN);
   }

   mpcx_dftrb (G, n/2, twiddle, 2, -1);

   /* sort out the real sequence */
   for (i = 0; i < m/2; i++) {
      mpfr_set (f [2*i], G [i]->re, GMP_RNDN);
      mpfr_set (f [2*i+1], G [i]->im, GMP_RNDN);
   }
   if (m % 2 != 0)
      mpfr_set (f [m-1], G [m/2]->re, GMP_RNDN);

   mpc_clear (tmp);
   mpc_clear (tmp2);
   for (i = 0; i < n/2; i++)
      mpc_clear (G [i]);
   free (G);
}

/**************************************************************************/

void mpfrx_array_mul_fft (mpfr_t *h, mpfr_t *f, mpfr_t *g, int m, int n)
   /* Replace h by the product of f and g, using the FFT.                 */
   /* f must have m, g must have n coefficients, and h must have          */
   /* sufficiently many coefficients to hold the result.                  */

{
   int    i;
   mpc_t  *zeta;
   mpfr_t *twiddle;
   mpc_t  *F, *G;
      /* holds the FFTs of f and g; that of h is also stored in F */
   int    N;
      /* order of the FFT */

   N = 4;
      /* necessary because we break the FFTs down to the level of fourth  */
      /* roots of unity                                                   */
   while (N < m + n - 1)
      N *= 2;

   mpcx_twiddle_init (&twiddle, N, mpfr_get_prec (h [0]));
   F = (mpc_t *) malloc ((N/2 + 1) * sizeof (mpc_t));
   G = (mpc_t *) malloc ((N/2 + 1) * sizeof (mpc_t));
   for (i = 0; i <= N/2; i++)
   {
      mpc_init2 (F [i], mpfr_get_prec (h [0]));
      mpc_init2 (G [i], mpfr_get_prec (h [0]));
   }

   /* Perform the FFTs. */
   mpfrx_dft_twoinone (F, G, f, g, m, n, N, twiddle, 1);
   /* Perform the pointwise multiplication and the inverse                */
   /* transformation.                                                     */
   for (i = 0; i <= N/2; i++)
      mpc_mul (F [i], F [i], G [i], MPC_RNDNN);
   /* free G as early as possible */
   for (i = 0; i <= N/2; i++)
      mpc_clear (G [i]);
   mpcx_zeta_init (&zeta, N, mpfr_get_prec (h [0]));
   mpfrx_dft_inv (h, F, m + n - 1, N, zeta, twiddle);
   for (i = 0; i < m + n - 1; i++)
      mpfr_div_ui (h [i], h [i], N, GMP_RNDN);

   mpcx_zeta_clear (zeta, N);
   mpcx_twiddle_clear (twiddle, N);
   for (i = 0; i <= N/2; i++)
      mpc_clear (F [i]);
   free (F);
   free (G);
}

/**************************************************************************/