/* mpcx_tree high-level functions working with trees of polynomials Copyright (C) 2009, 2010, 2011, 2012, 2020 Andreas Enge This file is part of the MPFRCX Library. The MPFRCX Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The MPFRCX Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPFRCX library; see the file COPYING.LESSER. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "mpfrcx-impl.h" static void mpcx_evaltree (mpcx_tree_ptr t, mpcx_tree_srcptr subproducts, mpcx_srcptr f); static void mpcx_multieval_fast (mpc_t *values, mpc_t *args, int no, mpcx_srcptr f); /**************************************************************************/ void mpcx_tree_init (mpcx_tree_ptr t, int no, mpfr_prec_t prec) { /* initialises a binary tree with "no" leaves for storing polynomials */ /* of precision "prec" */ /* All tree nodes are allocated and initialised. */ int i, j, tmp; t->no = no; for (i = no, t->levels = 1; i > 1; i = (i+1) / 2, t->levels++); t->node = (mpcx_t **) malloc (t->levels * sizeof (mpcx_t *)); t->width = (int *) malloc (t->levels * sizeof (int)); for (i = 0, tmp = no; i < t->levels; i++, tmp = (tmp + 1) / 2) { t->width [i] = tmp; t->node [i] = (mpcx_t *) malloc (tmp * sizeof (mpcx_t)); for (j = 0; j < tmp; j++) mpcx_init (t->node [i][j], 2, prec); } } /**************************************************************************/ void mpcx_tree_clear (mpcx_tree_ptr t) { /* clears t and all the polynomials on its nodes */ int i, j; for (i = 0; i < t->levels; i++) { for (j = 0; j < t->width [i]; j++) mpcx_clear (t->node [i][j]); free (t->node [i]); } free (t->node); free (t->width); } /**************************************************************************/ void mpcx_tree_get_root (mpcx_ptr f, mpcx_tree_srcptr t) { /* assigns the root of t to f */ mpcx_set (f, t->node [t->levels-1][0]); } /**************************************************************************/ void mpcx_subproducttree (mpcx_tree_ptr t, mpcx_t *factors) { /* takes an array of t->no polynomials and returns via t the tree */ /* containing their subproducts: res [0,j] contains factors [j], and */ /* res [i,j] contains res [i-1,2j] * res [i-1,2j+1]. */ /* Optimal (at least if all degrees are the same) version as found out */ /* with Guillaume and Pierrick: always multiply two factors, sweeping */ /* the array from left to right */ /* t must be initialised with the correct number of leaves (as many as */ /* there are entries in "factors") and precision (usually the common */ /* precision of all entries in "factors"). */ int i, j; /* construct the basic layer */ for (j = 0; j < t->no; j++) mpcx_set (t->node [0][j], factors [j]); for (i = 1; i < t->levels; i++) { /* construct layer i */ for (j = 0; j < t->width [i-1] / 2; j++) mpcx_mul (t->node [i][j], t->node [i-1][2*j], t->node [i-1][2*j+1]); if (t->width [i-1] % 2 != 0) mpcx_set (t->node [i][t->width [i] - 1], t->node [i-1][t->width [i-1] - 1]); } } /**************************************************************************/ static void mpcx_evaltree (mpcx_tree_ptr t, mpcx_tree_srcptr subproducts, mpcx_srcptr f) { /* second step in multipoint evaluation; takes a subproduct tree with */ /* "no" entries and a polynomial f, and returns in t the tree of the */ /* f mod subproducts [i][j], computed at the precision of f */ /* t must be initialised with the same number of leaves as the */ /* subproduct tree. */ int i, j; mpcx_rem (t->node [subproducts->levels-1][0], f, subproducts->node [t->levels-1][0]); for (i = t->levels - 2; i >= 0; i--) for (j = 0; j < t->width [i]; j++) mpcx_rem (t->node [i][j], t->node [i+1][j/2], subproducts->node [i][j]); } /***************************************************************************/ static void mpcx_multieval_fast (mpc_t *values, mpc_t *args, int no, mpcx_srcptr f) { /* The internal version of the function. Always builds a subproduct */ /* tree independently of the degree of f and the number of arguments. */ mpcx_tree_t subproducts, evaltree; mpcx_t* factors; int i, j; factors = (mpcx_t *) malloc (no * sizeof (mpcx_t)); for (i = 0; i < no; i++) { mpcx_init (factors [i], 2, f->prec); factors [i]->deg = 1; mpc_set_ui (factors [i]->coeff [1], 1, MPC_RNDNN); mpc_neg (factors [i]->coeff [0], args [i], MPC_RNDNN); } mpcx_tree_init (subproducts, no, factors [0]->prec); mpcx_subproducttree (subproducts, factors); mpcx_tree_init (evaltree, no, factors [0]->prec); mpcx_evaltree (evaltree, subproducts, f); for (j = 0; j < no; j++) mpc_set (values [j], evaltree->node [0][j]->coeff [0], MPC_RNDNN); mpcx_tree_clear (subproducts); mpcx_tree_clear (evaltree); for (i = 0; i < no; i++) mpcx_clear (factors [i]); free (factors); } /**************************************************************************/ void mpcx_multieval (mpc_t *values, mpc_t *args, int no, mpcx_srcptr f) { /* evaluates the polynomial f in the "no" arguments args and returns */ /* the results via values. values must contain sufficient space, and */ /* each entry must be initialised. */ /* values and args may point to the same array. */ /* Works only if f and values have the same precision. */ /* Splits the arguments into portions of about half the degree of f */ /* and calls the fast multipoint evaluation routine on the chunks. */ int chunks, size; int i; if (f->deg <= 1) chunks = no; else if (f->deg > 2 * no) chunks = 1; else chunks = (2 * no) / f->deg; size = no / chunks; for (i = 0; i < chunks - 1; i++) mpcx_multieval_fast (values + i*size, args + i*size, size, f); mpcx_multieval_fast (values + (chunks - 1)*size, args + (chunks - 1)*size, no - (chunks - 1) * size, f); } /***************************************************************************/ void mpcx_hecke (mpcx_ptr rop, mpcx_tree_srcptr subproducts, mpcx_t *vals) { /* Let factors [0],... be the leaves of subproducts, no their number, */ /* and f=\prod_{i=0}^{no-1} factors [i] the polynomial at the root of */ /* subproducts. */ /* Returns \sum_{i=0}^{no-1} val [i]*f/factors [i], which is a */ /* simplified case of Lagrange interpolation and occurs when computing */ /* the Hecke representation of an algebraic number with conjugates */ /* val [i] in a number field defined by f. */ mpcx_t *hecke_old, *hecke_new = NULL; /* silence a spurious gcc warning */ /* two consecutive layers in the Hecke tree */ mpcx_t tmp; const int length = 2; mpfr_prec_t prec; int i, j; prec = subproducts->node [0][0]->prec; mpcx_init (tmp, length, prec); hecke_old = (mpcx_t *) malloc (subproducts->no * sizeof (mpcx_t)); for (j = 0; j < subproducts->no; j++) mpcx_init_set (hecke_old [j], vals [j]); for (i = 1; i < subproducts->levels; i++) { /* compute layer i */ hecke_new = (mpcx_t *) malloc (subproducts->width [i] * sizeof (mpcx_t)); for (j = 0; j < subproducts->width [i-1] / 2; j++) { mpcx_init (hecke_new [j], length, prec); mpcx_mul (hecke_new [j], hecke_old [2*j], subproducts->node [i-1][2*j+1]); mpcx_mul (tmp, hecke_old [2*j+1], subproducts->node [i-1][2*j]); mpcx_add (hecke_new [j], hecke_new [j], tmp); } if (subproducts->width [i-1] % 2 != 0) mpcx_init_set (hecke_new [subproducts->width [i] - 1], hecke_old [subproducts->width [i-1] - 1]); /* clear layer i-1 */ for (j = 0; j < subproducts->width [i-1]; j++) mpcx_clear (hecke_old [j]); free (hecke_old); /* swap */ hecke_old = hecke_new; /* One could save a few mallocs and frees and reuse entries of hecke_old, but this is probably not worth it. */ } mpcx_clear (tmp); mpcx_set (rop, hecke_old [0]); mpcx_clear (hecke_old [0]); free (hecke_old); } /***************************************************************************/ void mpcx_product_and_hecke (mpcx_t *rop, mpcx_t **vals, int no_pols, int no_factors) { /* vals is an array of no_pols arrays with no_factors entries. The */ /* functions uses vals [0] to compute a subproduct tree, the root of */ /* which is returned in rop [0]. Then the Hecke representation for the */ /* vals [i] is computed for i>=1 and returned via rop [i]. */ /* rop needs to have no_pols entries, all of which must be initialised. */ /* The function is written with as few nested loops as possible, so */ /* that it requires more index arithmetic, but is easier to parallelise */ /* via MPI. */ const int length = 2; const mpfr_prec_t prec = vals [0][0]->prec; int level = 0, width = no_factors, width_new, firsthalf; int l, m, i, j, j1, j2, j3; mpcx_t *new, *old; /* two consecutive layers of the trees */ mpcx_t tmp; mpcx_init (tmp, length, prec); old = (mpcx_t *) malloc (no_pols * width * sizeof (mpcx_t)); for (i = 0; i < no_pols; i++) for (j = 0; j < width; j++) mpcx_init_set (old [i*width + j], vals [i][j]); while (width > 1) { /* compute new layer */ level++; width_new = (width + 1) / 2; firsthalf = width / 2; /* initialise new layer */ new = (mpcx_t *) malloc (no_pols * width_new * sizeof (mpcx_t)); for (m = 0; m < no_pols * width_new; m++) mpcx_init (new [m], length, prec); /* carry out the multiplications */ for (l = 0; l < (2 * no_pols - 1) * firsthalf; l++) { i = ((l / firsthalf) + 1) / 2; j = l - (i == 0 ? 0 : (2*i-1) * firsthalf); if (j < firsthalf) { j3 = j; j1 = 2*j; j2 = j1+1; } else { j3 = j-firsthalf; j2 = 2*(j-firsthalf); j1 = j2+1; } mpcx_mul (tmp, old [0*width+j1], old [i*width+j2]); mpcx_add (new [i*width_new+j3], new [i*width_new+j3], tmp); /* some of these additions actually add to the zero polynomial, but this allows to un-nest the loops */ } /* copy odd (pun intended!) left-overs */ if (width % 2 != 0) for (i = 0; i < no_pols; i++) mpcx_set (new [(i+1)*width_new-1], old [(i+1)*width-1]); /* clear old layer */ for (m = 0; m < no_pols * width; m++) mpcx_clear (old [m]); free (old); /* swap */ old = new; width = width_new; } for (i = 0; i < no_pols; i++) { mpcx_set (rop [i], old [i]); mpcx_clear (old [i]); } free (old); mpcx_clear (tmp); } /***************************************************************************/ void mpcx_reconstruct (mpcx_ptr rop, mpcx_t *factors, int no) /* takes an array of no polynomials and multiplies them all together */ /* into rop */ { int i, layer = 1; mpcx_t *op; op = (mpcx_t *) malloc (no * sizeof (mpcx_t)); /* copy factors into op */ for (i = 0; i < no; i++) mpcx_init_set (op [i], factors [i]); while (no > 1) { for (i = 0; i < no / 2; i++) /* replace op [i] by op [2i] * op [2i+1] */ mpcx_mul (op [i], op [2*i], op [2*i+1]); if (no % 2 != 0) mpcx_swap (op [no / 2], op [no - 1]); for (i = (no + 1) / 2; i < no; i++) mpcx_clear (op [i]); no = (no + 1) / 2; layer++; } /* The result is now in op [0]. */ mpcx_set (rop, op [0]); mpcx_clear (op [0]); free (op); } /***************************************************************************/