/* mpfrx_mul computes h = f * g Copyright (C) 2009, 2011, 2012, 2013 Andreas Enge This file is part of the MPFRCX Library. The MPFRCX Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The MPFRCX Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPFRCX library; see the file COPYING.LESSER. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "mpfrcx-impl.h" static void mpfrx_array_mul_karatsuba (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n, const int offm, const int offn, mpfr_t* buff); static void mpfrx_array_mul_toomcook (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n, const int offm, const int offn, mpfr_t* buff); static void mpfrx_array_mul (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n); /**************************************************************************/ void mpfrx_mul (mpfrx_ptr h, mpfrx_srcptr f, mpfrx_srcptr g) { int overlap; mpfrx_t h_local; int f_monic, g_monic, i; if (f->deg == -1 || g->deg == -1) { h->deg = -1; return; } f_monic = (mpfr_cmp_si (f->coeff [f->deg], 1) == 0); g_monic = (mpfr_cmp_si (g->coeff [g->deg], 1) == 0); if (f_monic && f->deg == 0) { mpfrx_set (h, g); return; } if (g_monic && g->deg == 0) { mpfrx_set (h, f); return; } overlap = (h == f) || (h == g); if (overlap) mpfrx_init (h_local, f->deg + g->deg + 1, h->prec); else mpfrx_mv (h_local, h); h_local->deg = f->deg + g->deg; if (h_local->size < h_local->deg + 1) mpfrx_realloc (h_local, h_local->deg + 1); if (f_monic && g_monic) { mpfrx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg); /* watch out: the coefficient of X^{f->deg+g->deg-1} has not been set */ for (i = 0; i < f->deg - 1; i++) mpfr_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg], f->coeff [i], GMP_RNDN); mpfr_set (h_local->coeff [f->deg + g->deg - 1], f->coeff [f->deg - 1], GMP_RNDN); for (i = 0; i < g->deg; i++) mpfr_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg], g->coeff [i], GMP_RNDN); mpfr_set_ui (h_local->coeff [h_local->deg], 1, GMP_RNDN); } else if (f_monic) { mpfrx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg, g->deg+1); for (i = 0; i < g->deg; i++) mpfr_add (h_local->coeff [i + f->deg], h_local->coeff [i + f->deg], g->coeff [i], GMP_RNDN); mpfr_set (h_local->coeff [f->deg + g->deg], g->coeff [g->deg], GMP_RNDN); } else if (g_monic) { mpfrx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg); for (i = 0; i < f->deg; i++) mpfr_add (h_local->coeff [i + g->deg], h_local->coeff [i + g->deg], f->coeff [i], GMP_RNDN); mpfr_set (h_local->coeff [f->deg + g->deg], f->coeff [f->deg], GMP_RNDN); } else mpfrx_array_mul (h_local->coeff, f->coeff, g->coeff, f->deg+1, g->deg+1); if (overlap) mpfrx_clear (h); mpfrx_mv (h, h_local); } /**************************************************************************/ /* */ /* internal multiplication functions working directly on arrays of */ /* coefficients; together with the array, the number of coefficients is */ /* passed to the functions, and possibly an offset, indicating that the */ /* coefficient of degree i of the polynomial is in fact stored at the */ /* position i*offset. This facilitates the comb-like decomposition of */ /* polynomials during Karatsuba and Toom-Cook multiplication. */ /* */ /**************************************************************************/ static void mpfrx_array_mul_karatsuba (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n, const int offm, const int offn, mpfr_t* buff) { /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */ /* represented by an array of coefficients. The coefficient of degree */ /* i of f is stored at the index i*offm, similarly for g. h is */ /* replaced by f times g, computed by Karatsuba multiplication, and */ /* using an offset of 1. In practice by recursion, the offsets will */ /* always be powers of 2. */ /* buff must point to a temporary space of m+n+1 coefficients, which */ /* means 2 coefficients more than the size of the result. */ /* h must be different from f and g and contain sufficiently much */ /* space to hold all the coefficients. */ int i; mpfr_t *f_local, *g_local, *h_local, *buff_local, *tmp; int m_local, n_local; if (m == 1) for (i = 0; i < n; i++) { mpfr_mul (h [0], g [0], f [0], GMP_RNDN); h++; g += offn; } else if (n == 1) for (i = 0; i < m; i++) { mpfr_mul (h [0], f [0], g [0], GMP_RNDN); h++; f += offm; } else { /* recursion */ /* Write f = f_0 (X^2) + X f_1 (X^2), g = g_0 (X^2) + X g_1 (X^2). */ /* copy f_0 + f_1 and g_0 + g_1 into the buffer */ buff_local = buff; tmp = f; f_local = buff_local; m_local = m/2; for (i = 0; i < m_local; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offm; mpfr_add (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offm; buff_local++; } if (m % 2 != 0) { m_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } tmp = g; g_local = buff_local; n_local = n/2; for (i = 0; i < n_local; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offn; mpfr_add (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offn; buff_local++; } if (n % 2 != 0) { n_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } /* multiply them together into the following piece of the buffer, */ /* using the result as buffer */ mpfrx_array_mul_karatsuba (buff_local, f_local, g_local, m_local, n_local, 1, 1, h); /* buff_local contains ((f_0 + f_1)*(g_0 + g1)) (X^2); this must b */ /* copied into h, multiplied by X */ tmp = h + 1; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_set (*tmp, *buff_local, GMP_RNDN); tmp += 2; buff_local++; } /* multiply f0 and g0 into the beginning of the buffer, using the */ /* remaining piece of buffer */ /* m_local and n_local remain as before. */ mpfrx_array_mul_karatsuba (buff, f, g, m_local, n_local, 2*offm, 2*offn, buff + m_local + n_local - 1); /* buff contains (f_0*g_0) (X^2); this must be put into h; and */ /* subtracted, multiplied by X */ tmp = h; h_local = buff; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_set (*tmp, *h_local, GMP_RNDN); tmp++; mpfr_sub (*tmp, *tmp, *h_local, GMP_RNDN); tmp++; h_local++; } /* set the remaining even coefficient to 0 */ if (m % 2 == 0 && n % 2 == 0) mpfr_set_ui (h [2 * (m_local + n_local - 1)], 0, GMP_RNDN); /* multiply f1 and g1 into the beginning of the buffer, using the */ /* remaining piece of buffer */ m_local = m/2; n_local = n/2; mpfrx_array_mul_karatsuba (buff, f+offm, g+offn, m_local, n_local, 2*offm, 2*offn, buff + m_local + n_local - 1); /* buff contains (f_1*g_1) (X^2); this must be added to h, */ /* multiplied by X^2; and subtracted, multiplied by X */ tmp = h + 1; h_local = buff; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_sub (*tmp, *tmp, *h_local, GMP_RNDN); tmp++; mpfr_add (*tmp, *tmp, *h_local, GMP_RNDN); tmp++; h_local++; } } } /**************************************************************************/ static void mpfrx_array_mul_toomcook (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n, const int offm, const int offn, mpfr_t* buff) { /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */ /* represented by an array of coefficients. The coefficient of degree */ /* i of f is stored at the index i*offm, similarly for g. h is */ /* replaced by f times g, computed by Toom-Cook 3-way multiplication, */ /* and using an offset of 1. In practice by recursion, the offsets */ /* will always be powers of 2. */ /* buff must point to a sufficiently large temporary space; */ /* m + n + 117 coefficients are enough for m, n < 2^64. */ /* h must be different from f and g and contain sufficiently much */ /* space to hold all the coefficients. */ int i; mpfr_t *f_local, *g_local, *h_local, *buff_local, *tmp, tmpfr; int m_local, n_local, min1, min2, min3; if (m == 1) for (i = 0; i < n; i++) { mpfr_mul (h [0], g [0], f [0], GMP_RNDN); h++; g += offn; } else if (n == 1) for (i = 0; i < m; i++) { mpfr_mul (h [0], f [0], g [0], GMP_RNDN); h++; f += offm; } else if (m == 2 || n == 2 || m == 4 || n == 4) mpfrx_array_mul_karatsuba (h, f, g, m, n, offm, offn, buff); else { /* recursion */ mpfr_init2 (tmpfr, mpfr_get_prec (h [0])); /* Write f = f_0 (X^3) + f_1 (X^3) X + f_2 (X^3) X^2, and likewise g */ /* Copy f_0 - f_1 + f_2 and g_0 - g_1 + g_2 into the buffer */ buff_local = buff; tmp = f; f_local = buff_local; m_local = m/3; for (i = 0; i < m_local; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offm; mpfr_sub (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offm; mpfr_add (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offm; buff_local++; } if (m % 3 == 1) { m_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } else if (m % 3 == 2) { m_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp+= offm; mpfr_sub (*buff_local, *buff_local, *tmp, GMP_RNDN); buff_local++; } tmp = g; g_local = buff_local; n_local = n/3; for (i = 0; i < n_local; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offn; mpfr_sub (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offn; mpfr_add (*buff_local, *buff_local, *tmp, GMP_RNDN); tmp += offn; buff_local++; } if (n % 3 == 1) { n_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } else if (n % 3 == 2) { n_local++; mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp+= offn; mpfr_sub (*buff_local, *buff_local, *tmp, GMP_RNDN); buff_local++; } /* multiply them together into the following piece of the buffer, */ /* using the remaining piece of buffer */ h_local = buff_local; buff_local += m_local + n_local - 1; mpfrx_array_mul_toomcook (h_local, f_local, g_local, m_local, n_local, 1, 1, buff_local); /* compute mini, the minimal value such that X*(X^3)^i still occurs */ /* in the result */ min1 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 3) / 3 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 3) / 3); min2 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 4) / 3 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 4) / 3); min3 = ( (m+2) / 3 + (n+2) / 3 - 2 < (m + n - 5) / 3 ? (m+2) / 3 + (n+2) / 3 - 2 : (m + n - 5) / 3); /* h_local contains the value in -1; it must be multiplied by */ /* -1/3 X + 1/2 X^2 - 1/6 X^3 and put into the result. */ mpfr_set_ui (h [0], 0, GMP_RNDN); mpfr_set_ui (h [m+n-2], 0, GMP_RNDN); tmp = h + 2; buff_local = h_local; for (i = 0; i <= min2; i++) { mpfr_div_2ui (*tmp, *buff_local, 1, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 1; buff_local = h_local; for (i = 0; i <= min1; i++) { mpfr_div_ui (*tmp, *buff_local, 3, GMP_RNDN); mpfr_neg (*tmp, *tmp, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 3; for (i = 0; i <= min3; i++) { mpfr_div_2ui (*tmp, *(tmp-2), 1, GMP_RNDN); tmp += 3; } /* copy f_0 + f_1 + f_2 and g_0 + g_1 + g_2 into the buffer, by */ /* adding 2*f_1 and 2*g_1 to what is already there */ buff_local = f_local; tmp = f + offm; for (i = 0; i < (m+1) / 3; i++) { mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += 3*offm; buff_local++; } buff_local = g_local; tmp = g + offm; for (i = 0; i < (n+1) / 3; i++) { mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += 3*offm; buff_local++; } /* multiply them together into the following piece of the buffer, */ /* using the remaining piece of buffer */ h_local = buff + m_local + n_local; buff_local = h_local + m_local + n_local - 1; mpfrx_array_mul_toomcook (h_local, f_local, g_local, m_local, n_local, 1, 1, buff_local); /* h_local contains the value in 1; it must be multiplied by */ /* X + 1/2 X^2 - 1/2 X^3 and added to the result. */ tmp = h + 1; buff_local = h_local; for (i = 0; i <= min1; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* divide h_local by 2 in place */ buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_div_2ui (*buff_local, *buff_local, 1, GMP_RNDN); buff_local++; } tmp = h + 2; buff_local = h_local; for (i = 0; i <= min2; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 3; buff_local = h_local; for (i = 0; i <= min3; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* copy f_0 + 2*f_1 + 4*f_2 and g_0 + 2*g_1 + 4*g_2 into the buffer */ buff_local = buff; tmp = f; for (i = 0; i < m/3; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offm; mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += offm; mpfr_mul_2ui (tmpfr, *tmp, 2, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += offm; buff_local++; } if (m % 3 == 1) { mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } else if (m % 3 == 2) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp+= offm; mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); buff_local++; } tmp = g; for (i = 0; i < n/3; i++) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp += offn; mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += offn; mpfr_mul_2ui (tmpfr, *tmp, 2, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); tmp += offn; buff_local++; } if (n % 3 == 1) { mpfr_set (*buff_local, *tmp, GMP_RNDN); buff_local++; } else if (n % 3 == 2) { mpfr_set (*buff_local, *tmp, GMP_RNDN); tmp+= offn; mpfr_mul_2ui (tmpfr, *tmp, 1, GMP_RNDN); mpfr_add (*buff_local, *buff_local, tmpfr, GMP_RNDN); buff_local++; } /* multiply them together into the following piece of the buffer, */ /* using the remaining piece of buffer */ h_local = buff_local; buff_local += m_local + n_local - 1; mpfrx_array_mul_toomcook (h_local, f_local, g_local, m_local, n_local, 1, 1, buff_local); /* h_local contains the value in 2; it must be multiplied by */ /* -1/6 X + 1/6 X^3 and added to the result. */ /* divide h_local by 6 in place */ buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_div_2ui (*buff_local, *buff_local, 1, GMP_RNDN); mpfr_div_ui (*buff_local, *buff_local, 3, GMP_RNDN); buff_local++; } tmp = h + 1; buff_local = h_local; for (i = 0; i <= min1; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 3; buff_local = h_local; for (i = 0; i <= min3; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* multiply f0 and g0 into the start of the buffer, using the */ /* remaining buffer piece */ h_local = buff; buff_local = buff + m_local + n_local - 1; mpfrx_array_mul_toomcook (h_local, f, g, m_local, n_local, 3*offm, 3*offn, buff_local); /* h_local contains the value in 0; it must be multiplied by */ /* 1 - 1/2 X - X^2 + 1/2 X^3 and added to the result. */ tmp = h; buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 2; buff_local = h_local; for (i = 0; i <= min2; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* divide h_local by 2 in place */ buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_div_2ui (*buff_local, *buff_local, 1, GMP_RNDN); buff_local++; } tmp = h + 1; buff_local = h_local; for (i = 0; i <= min1; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 3; buff_local = h_local; for (i = 0; i <= min3; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* multiply f2 and g2 into the start of the buffer, using the */ /* remaining buffer piece */ /* Instead of ceil (m/3) and ceil (n/3), these now have floor (m/3) */ /* and floor (n/3) coefficients. */ m_local = m/3; n_local = n/3; h_local = buff; buff_local = buff + m_local + n_local - 1; mpfrx_array_mul_toomcook (h_local, f+2*offm, g+2*offn, m_local, n_local, 3*offm, 3*offn, buff_local); /* h_local contains the value in infinity; it must be multiplied by */ /* 2 X - X^2 - 2 X^3 + X^4 and added to the result. */ /* Here, even X^4*h_local does not exceed the result. */ tmp = h + 4; buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 2; buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } /* multiply h_local by 2 in place */ buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_mul_2ui (*buff_local, *buff_local, 1, GMP_RNDN); buff_local++; } tmp = h + 1; buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_add (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } tmp = h + 3; buff_local = h_local; for (i = 0; i < m_local + n_local - 1; i++) { mpfr_sub (*tmp, *tmp, *buff_local, GMP_RNDN); tmp += 3; buff_local++; } mpfr_clear (tmpfr); } } /**************************************************************************/ static void mpfrx_array_mul (mpfr_t* h, mpfr_t* f, mpfr_t* g, const int m, const int n) /* f is a polynomial of degree m-1, g a polynomial of degree n-1, */ /* represented by an array of coefficients. h is replaced by f times g.*/ /* h is expected to be different from f and g and must contain */ /* sufficiently much space to hold all the coefficients. */ { const int min = (m < n ? m : n), max = (m < n ? n : m); if (min >= MPFRX_FFT_THRESHOLD && min < MPFRX_NOFFT_THRESHOLD) if (min != max) { /* Check whether splitting the larger polynomial into smaller pieces is useful. We target an FFT of order the smallest power of 2 above 2*min. */ int length = 1, pieces; while (length < min) length *= 2; length *= 2; length = length + 1 - min; pieces = (max + length - 1) / length; length = (max + pieces - 1) / pieces; if (pieces > 1) { int i, j; mpfr_t *minpol = (m < n ? f : g); mpfr_t *maxpol = (m < n ? g : f); int current = length; mpfrx_t buffer; int bufsize = min + length - 1; mpfrx_init (buffer, bufsize, mpfr_get_prec (h [0])); /* clear out the result */ for (i = 0; i < min + max - 1; i++) mpfr_set_ui (h [i], 0, GMP_RNDN); /* multiply minpol by pieces of maxpol and add to result */ for (i = 0; i < pieces; i++) { /* The last piece may be shorter than length. */ if (i == pieces - 1) current = max - (pieces - 1) * length; mpfrx_array_mul_fft (buffer->coeff, minpol, maxpol + i * length, min, current); for (j = 0; j < min + current - 1; j++) mpfr_add (h [i * length + j], h [i * length + j], buffer->coeff [j], GMP_RNDN); } mpfrx_clear (buffer); } else mpfrx_array_mul_fft (h, f, g, m, n); } else mpfrx_array_mul_fft (h, f, g, m, n); else { mpfrx_t buffer; int bufsize; /* For Karatsuba, we would need m + n + 1 coefficients. */ /* mpfrx_init (buffer, m + n + 1, mpfr_get_prec (h [0]->re)); */ /* For Toom-Cook, we may need up to m + n + 3 floor (min (log_3 (m-1), log_3 (n-1)) - 1). */ bufsize = m + n + (min <= 82 ? 9 : (min <= 1162261468 ? 53 : 117)); mpfrx_init (buffer, bufsize, mpfr_get_prec (h [0])); mpfrx_array_mul_toomcook (h, f, g, m, n, 1, 1, buffer->coeff); mpfrx_clear (buffer); } }