/*! \file
    \brief Handling quotient matrices [[q, 1], [1, 0]]
*/

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>

#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"

#include "gcd_utils.h"
#include "gcd_matrix.h"
#include "qseq.h"
#include "regular.h"
#include "gcd_common.h"

#define DEBUG_REGULAR 0
#define TIMINGS 0

/* For a of size n */
void regular_init(regular_t R, mp_size_t lqn, mp_size_t Qn, int initlq,
		  int updateQ)
{
    //    printf("regular_init: lqn=%lu, Qn=%lu\n", lqn, Qn);
    qseq_init(R->lq, (initlq == 1 ? lqn : 0));
    R->Q = (struct hgcd_matrix *)malloc(sizeof(struct hgcd_matrix));
    hgcd_matrix_init_set_identity(R->Q, Qn);
    R->det = 1;
    R->updateQ = updateQ;
}

void regular_clear(regular_t R)
{
    //    printf("regular_clear: card(lq)=%d, Q->n=%lu\n", qseq_card(R->lq), R->Q->n);
    if(qseq_is_used(R->lq) != 0)
	qseq_clear(R->lq);
    hgcd_matrix_clear(R->Q);
    free(R->Q);
}

void regular_print(regular_t R)
{
    printf("R->Q:="); hgcd_matrix_print(R->Q); printf(";\n");
    printf("lq:="); qseq_print(R->lq);
}

/* If Q = [[p, q], [r, s]] is regular, we should have
   p >= max(q, r) >= min(q, r) >= s, and p > s. */
int regular_are_norms_correct(regular_t R)
{
    int ok = 1;

    if(hgcd_matrix_is_identity(R->Q))
	return ok; /* humf */
    /* comparisons with p */
    if(mpn_cmp(R->Q->p[0][0], R->Q->p[0][1], R->Q->n) < 0){
	printf("p < q\n");
	ok = 0;
    }
    else if(mpn_cmp(R->Q->p[0][0], R->Q->p[1][0], R->Q->n) < 0){
	printf("p < r\n");
	ok = 0;
    }
    else if(mpn_cmp(R->Q->p[0][0], R->Q->p[1][1], R->Q->n) <= 0){
	printf("p <= s\n");
	printf("p:="); MPN_PRINT(R->Q->p[0][0], R->Q->n); printf(";\n");
	printf("s:="); MPN_PRINT(R->Q->p[1][1], R->Q->n); printf(";\n");
	ok = 0;
    }
    else if(mpn_cmp(R->Q->p[0][1], R->Q->p[1][1], R->Q->n) < 0){
	printf("q < s\n");
	ok = 0;
    }
    else if(mpn_cmp(R->Q->p[1][0], R->Q->p[1][1], R->Q->n) < 0){
	printf("r < s\n");
	printf("r:="); MPN_PRINT(R->Q->p[1][0], R->Q->n); printf(";\n");
	printf("s:="); MPN_PRINT(R->Q->p[1][1], R->Q->n); printf(";\n");
	ok = 0;
    }
    return ok;
}

/* x_i*||Q_i|| <= m. */
int regular_check_inequality(regular_t R, mp_ptr xi, mp_size_t xin,
			     mp_ptr m, mp_size_t mn)
{
    mp_size_t tmpn = (xin + R->Q->n);
    mp_ptr tmp = (mp_ptr)malloc(tmpn * sizeof(mp_limb_t));
    int ok;

    if(xin >= R->Q->n)
	mpn_mul(tmp, xi, xin, R->Q->p[0][0], R->Q->n);
    else
	mpn_mul(tmp, R->Q->p[0][0], R->Q->n, xi, xin);
    MPN_NORMALIZE(tmp, tmpn);
    ok = (tmpn < mn || (tmpn == mn && mpn_cmp(tmp, m, mn) <= 0));
    free(tmp);
    return ok;
}

/* TODO: should disappear */
void regular_mul_qi(regular_t R, int i, mp_ptr tp, mp_size_t tp_alloc)
{
    mp_ptr q;
    mp_size_t qn;
    mp_limb_t lq = 0;

    /* FIXME: special case A = Id */
    int j = R->lq->tind[i];
    if(j == 0)
	return;
    else if(j > 0){
	lq = (mp_size_t)j;
	q = &lq;
	qn = 1;
    }
    else{
	j = -j;
	q  = R->lq->tab_large+j;
	qn = R->lq->tab_large_nl[j];
    }
#if DEBUG_REGULAR >= 1
    printf("MAG K22:=KMatrixSpace(QQ, 2, 2);\n");
    printf("MAG Aorig:=K22!"); hgcd_matrix_print(R->Q);
    printf(";\nMAG q:=");
    if(lq != 0)
	printf("%lu", lq);
    else 
	MPN_PRINT(q, qn);
    printf(";\n");
#endif
    if(R->updateQ)
	hgcd_matrix_mul_q(R->Q, q, qn, tp, tp_alloc);
}

/** INPUT: q > 0; A = [[a, b], [c, d]]
    PRECONDITION: tp_alloc >= M->n + qn + 1
    SIDE-EFFECT: A /= [[q, 1], [1, 0]] => [[b, a-b*q], [d, c-d*q]]
    Rem. special code for the 41% case with q = 1.

    FIXME: Rarely used vs. mul_q?

    By hypothesis, the inverse operation was done, hence there is no
    underflow possible.

 */
void regular_div_q(regular_t R, mp_ptr q, mp_size_t qn,
		   mp_ptr tp, mp_size_t tp_alloc)
{
    mp_limb_t carry[2];
    int ii;

#if DEBUG_REGULAR >= 2
    printf("R:="); hgcd_matrix_print(R->Q); printf(";\n");
    printf("R->Q->n = %ld\n", R->Q->n);
    printf("MAG K22:=KMatrixSpace(QQ, 2, 2);\n");
    printf("MAG Aorig:=K22!"); hgcd_matrix_print(R->Q); printf(";\n");
    printf("MAG q:="); MPN_PRINT(q, qn); printf(";\n");
#endif
    if(qn == 1){
	if(q[0] == (mp_limb_t)1){
	    /* very frequent: A <- [[b, a-b], [d, c-d]] */
	    if(R->Q->n == 1)
		for(ii = 0; ii < 2; ii++)
		    sub_ddmmss(carry[ii], R->Q->p[ii][0][0],
			       0, R->Q->p[ii][0][0], 0, R->Q->p[ii][1][0]);
	    else
		for(ii = 0; ii < 2; ii++)
		    mpn_sub_n(R->Q->p[ii][0], R->Q->p[ii][0], R->Q->p[ii][1],
			      R->Q->n);
	}
	else{
	    for(ii = 0; ii < 2; ii++)
		carry[ii] = mpn_submul_1(R->Q->p[ii][0], R->Q->p[ii][1],
					 R->Q->n, q[0]);
	}
    }
    else{
	/* rare case of qn > 1 */
	mp_ptr a, b;
	mp_size_t an, bn, tpn;
	
	for(ii = 0; ii < 2; ii++){
	    a = R->Q->p[ii][0];
	    b = R->Q->p[ii][1];
	    an = R->Q->n;
	    MPN_NORMALIZE(a, an);
	    bn = R->Q->n;
	    MPN_NORMALIZE(b, bn);
	    if(bn != 0){
		tpn = bn+qn;
		assert(tpn <= tp_alloc);
		tp[tpn-1] = 0;
		if(bn >= qn)
		    mpn_mul(tp, b, bn, q, qn);
		else
		    mpn_mul(tp, q, qn, b, bn);
		MPN_NORMALIZE(tp, tpn);
		/* a -= q*b */
		mpn_sub(a, a, an, tp, tpn);
	    }
	}
    }
    for(ii = 0; ii < 2; ii++)
	/* swap (a-b*q, b) */
	MP_PTR_SWAP(R->Q->p[ii][1], R->Q->p[ii][0]);
#if DEBUG_REGULAR >= 2
    printf("MAG Aq:=K22!"); hgcd_matrix_print(R->Q);
    printf(";\nMAG IsZero(MyMul(Aorig, Matrix([[0, 1], [1, -q]]))-Aq);\n");
#endif
    MPN_NORMALIZE(R->Q->p[0][0], R->Q->n);
    //DET R->det = - R->det;
}

void regular_div_qi(regular_t R, int i, mp_ptr tp, mp_size_t tp_alloc)
{
    mp_ptr q;
    mp_size_t qn;
    mp_limb_t lq = 0;

    int j = R->lq->tind[i];
    if(j == 0)
	return;
    else if(j > 0){
	lq = j;
	q = &lq;
	qn = 1;
    }
    else{
	j = -j;
	q  = R->lq->tab_large+j;
	qn = R->lq->tab_large_nl[j];
    }
#if DEBUG_REGULAR >= 2
    printf("MAG K22:=KMatrixSpace(QQ, 2, 2);\n");
    printf("MAG Aorig:=K22!"); hgcd_matrix_print(R->Q); printf(";\n");
    if(lq != 0)
	printf("MAG q:=%lu;\n", lq);
    else{
	printf("MAG q:="); MPN_PRINT(q, qn); printf(";\n");
    }
#endif
    regular_div_q(R, q, qn, tp, tp_alloc);
}

/* Perform R o* q */
void regular_omul_q(regular_t R, mp_ptr q, mp_size_t qn,
		    mp_ptr tp, mp_size_t tp_alloc)
{
    if(qseq_is_used(R->lq) != 0)
	qseq_add_last_mpn(R->lq, q, qn);
    hgcd_matrix_mul_q(R->Q, q, qn, tp, tp_alloc);
    ASSERT(R->det == qseq_determinant(R->lq));
}

/* R <- R omul S */
void regular_omul(regular_t R, regular_t S)
{
#if TIMINGS >= 1
    double tt = runtime();
#endif
    qseq_omul(R->lq, S->lq);
#if TIMINGS >= 1
    fprintf(stderr, "qseq_omul: cards=%d %lf\n", qseq_card(S->lq), runtime()-tt);
#endif
    /* R->Q *= S->Q */
#if TIMINGS >= 1
    tt = runtime();
#endif
    hgcd_matrix_mul(R->Q, S->Q);
#if TIMINGS >= 1
    fprintf(stderr, "mat_omul: S->Q->n=%ld %lf\n", S->Q->n, runtime()-tt);
#endif
}

/* Perform R o/ lastq */
void regular_oslash(regular_t R, mp_ptr tp, mp_size_t tp_alloc)
{
    if(qseq_is_used(R->lq) != 0){
	regular_div_qi(R, R->lq->last-1, tp, tp_alloc);
	qseq_remove_last(R->lq);
    }
    else
	assert(0);
}

static void oslash3_aux(mp_ptr xi, mp_size_t *xin,
			mp_ptr xip1, mp_size_t *xip1n,
			mp_ptr q, mp_size_t qn,
			mp_ptr tp, mp_size_t tp_alloc)
{
    mp_size_t tpn;

    /* (x, *xn) += (y, yn) * last(lq) */
    MPN_COPY(tp, xip1, *xip1n);
    MPN_ZERO(tp+(*xip1n), (*xin)-(*xip1n));
    tpn = *xin;
    /* tp <- xip1+xi*qi == xim1 */
    gcd_addmul(tp, &tpn, xi, *xin, q, qn);
    //    mpn_qseq_addmul_last(tp, &tpn, xi, *xin, R->lq);
    MPN_COPY(xip1, xi, *xin);
    *xip1n = *xin;
    MPN_COPY(xi, tp, tpn);
    *xin = tpn;
}

/* Easy when R->lq is used. */
static void oslash3_lq(regular_t R, mp_ptr xi, mp_size_t *xin,
		       mp_ptr xip1, mp_size_t *xip1n,
		       mp_ptr tp, mp_size_t tp_alloc)
{
    mp_ptr q = NULL;
    mp_size_t qn;
    
#if DEBUG_REGULAR > 0
    printf("qi:="); qseq_print_cell(R->lq, R->lq->last-1); printf(";\n");
#endif
    /* R o/ lastq */
    q = mpn_qseq_get_last(&qn, R->lq);
    regular_div_q(R, q, qn, tp, tp_alloc);
    oslash3_aux(xi, xin, xip1, xip1n, q, qn, tp, tp_alloc);
    qseq_remove_last(R->lq);
    free(q);
}

static void oslash3_mat(regular_t R, mp_ptr xi, mp_size_t *xin,
			mp_ptr xip1, mp_size_t *xip1n,
			mp_ptr tp, mp_size_t tp_alloc)
{
    mp_ptr q = NULL, r;
    mp_size_t qn, rn;
    mp_size_t tabn[2][2];
    int i, j;

#if DEBUG_REGULAR > 0
    printf("Q:="); hgcd_matrix_print(R->Q); printf("\n");
#endif
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++){
	    tabn[i][j] = R->Q->n;
	    MPN_NORMALIZE(R->Q->p[i][j], tabn[i][j]);
	}
    if(tabn[1][1] > 1 || (tabn[1][1] == 1 && R->Q->p[1][1][0] > 1)){
	/* d is large, usual case */
	/* aim2:=ai mod aim1; // aim2 == bim1
	   cim2:=ci mod cim1; // cim2 == dim1 */
	/* ai = qi * aim1 + aim2: we do Euclid on (ai, aim1) */
	q = (mp_ptr)malloc((R->Q->n+1) * sizeof(mp_limb_t));
	r = (mp_ptr)malloc((R->Q->n+1) * sizeof(mp_limb_t));
	for(i = 0; i < 2; i++){
	    /* compute aim2 or cim2 */
	    qn = euclidean_div_rem(q, r, R->Q->p[i][0], tabn[i][0],
				   R->Q->p[i][1], tabn[i][1], tp, tp_alloc);
	    MP_PTR_SWAP(R->Q->p[i][1], R->Q->p[i][0]);
	    rn = tabn[i][1];
	    MPN_NORMALIZE(r, rn);
	    MPN_COPY(R->Q->p[i][1], r, rn);
	    MPN_ZERO(R->Q->p[i][1]+rn, R->Q->n-rn);
	}
	MPN_NORMALIZE(R->Q->p[0][0], R->Q->n);
	oslash3_aux(xi, xin, xip1, xip1n, q, qn, tp, tp_alloc);
	free(q);
	free(r);
    }
    else{
	assert(0);
	/* d <= 1 */
	if(tabn[1][1] == 0){
	    /* d = 0 => Q = [[q, 1], [1, 0]] => Qprec = identity */
	    printf("Q = Q1\n");
	    /* aim1:=1; aim2:=0;
	       cim1:=0; cim2:=1; 
	       lastq = R->Q->p[0][0];
	    */
	    oslash3_aux(xi, xin, xip1, xip1n, R->Q->p[0][0], R->Q->n,
			tp, tp_alloc);
	    /* set to identity */
	    MPN_ZERO(R->Q->p[0][0], R->Q->n);
	    R->Q->p[0][0][0] = 1;
	    R->Q->p[0][1][0] = 0;
	    R->Q->p[1][0][0] = 0;
	    R->Q->p[1][1][0] = 1;
	    R->Q->n = 1;
	}
	else{
	    /* this could be di = d3 or d2 only, since d1 = 0 */
	    /* could we have a very big aim1 == bi? */
	    mp_size_t ain = R->Q->n, bin = R->Q->n, cin = R->Q->n, tpn;
	    MPN_NORMALIZE(R->Q->p[0][0], ain);
	    MPN_NORMALIZE(R->Q->p[0][1], bin);
	    MPN_NORMALIZE(R->Q->p[1][0], cin);
	    tpn = bin+cin;
	    if(bin >= cin)
		mpn_mul(tp, R->Q->p[0][1], bin, R->Q->p[1][0], cin);
	    else
		mpn_mul(tp, R->Q->p[1][0], cin, R->Q->p[0][1], bin);
	    MPN_NORMALIZE(tp, tpn);
	    if(ain > tpn
	       || (ain == tpn && mpn_cmp(R->Q->p[0][0],tp,tpn) > 0)){
		/* ai > bi*ci => ai = bi*ci+1
		   Q = Q2 => Qprev = Q1 = [[bi, 1], [1, 0]]
		*/
		MP_PTR_SWAP(R->Q->p[0][1], R->Q->p[0][0]);
		MPN_ZERO(R->Q->p[0][1], R->Q->n);
		R->Q->p[0][1][0] = 1;
		MPN_ZERO(R->Q->p[1][0], R->Q->n);
		R->Q->p[1][0][0] = 1;
		MPN_ZERO(R->Q->p[1][1], R->Q->n);
		MPN_NORMALIZE(R->Q->p[0][0], R->Q->n);
	    }
	    else{
		/* ai <= bi*ci => ai = bi*ci-1 
		   Q = Q3 => Qprev = Q2 = [[bi, bi-1], [1, 1]]
		*/
		MP_PTR_SWAP(R->Q->p[0][1], R->Q->p[0][0]);
		/* compute bi-1 */
		mpn_sub_1(R->Q->p[0][1], R->Q->p[0][1], bin, (mp_limb_t)1);
		MPN_ZERO(R->Q->p[1][0], R->Q->n);
		R->Q->p[1][0][0] = 1;
		MPN_ZERO(R->Q->p[1][1], R->Q->n);
		R->Q->p[1][1][0] = 1;
		R->Q->n = bin;
	    }
	}
    }
}

/* PRECONDITION: xi > xip1
   Perform (xi, xip1, R) o/ R since
   xim1 = qi * xi + xip1
   R->Q = Qi = [[ai, bi], [ci, di]] = [[ai, aim1], [ci, cim1]]
   Qim1 = Qprev:=Matrix([[aim1, aim2], [cim1, cim2]]);
   SIDE-EFFECT: Qim1 replace Qi in R->Q.
*/
void regular_oslash3(regular_t R, mp_ptr xi, mp_size_t *xin,
		     mp_ptr xip1, mp_size_t *xip1n,
		     mp_ptr tp, mp_size_t tp_alloc)
{
#if DEBUG_REGULAR > 0
    printf("xi:="); MPN_PRINT(xi, *xin); printf(";\n");
    printf("xip1:="); MPN_PRINT(xip1, *xip1n); printf(";\n");
    assert(*xin >= *xip1n);
#endif
    if(qseq_is_used(R->lq))
	oslash3_lq(R, xi, xin, xip1, xip1n, tp, tp_alloc);
    else
	oslash3_mat(R, xi, xin, xip1, xip1n, tp, tp_alloc);
#if DEBUG_REGULAR > 0
    printf("Qp:="); hgcd_matrix_print(R->Q); printf("\n");
#endif
}

void regular_set_identity(regular_t R)
{
    hgcd_matrix_set_identity(R->Q);
}

void regular_add_last_mpn(regular_t R, mp_ptr q, mp_size_t qn,
			  mp_ptr tp, mp_size_t tp_alloc)
{
#if DEBUG_REGULAR >= 1
    printf("add_last: q:="); MPN_PRINT(q, qn); printf(";\n");
#endif
    if(qseq_is_used(R->lq) != 0)
	qseq_add_last_mpn(R->lq, q, qn);
    /* update R->Q */
    hgcd_matrix_mul_q(R->Q, q, qn, tp, tp_alloc);
#if DEBUG_REGULAR >= 1
    check_Q_from_lq(R->Q, R->lq);
#endif
    // DET R->det = -R->det;
}

/* TODO: improve this -> R is identity perhaps? */
int regular_is_empty(regular_t R)

{
#if DEBUG_REGULAR >= 1
    printf("R->lq="); qseq_print(R->lq); printf(";\n");
    printf("is_empty=%d\n", qseq_is_empty(R->lq));
#endif
    return qseq_is_empty(R->lq);
}

int regular_determinant(regular_t R)
{
    if(qseq_is_used(R->lq)){
	int det = qseq_determinant(R->lq);
	ASSERT(det == R->det);
	return det; /* TMP */
    }
    return R->det;
}

/* M ?= S[S->first..S->last[ */
int check_Q_from_lq(struct hgcd_matrix *Q, qseq_t lq)
{
    struct hgcd_matrix bfQ;
    mp_size_t len = qseq_card(lq);
    int ok;

#if QSEQ_FULL == 0
    printf("W: sorry, cannot check Q from lq since lq is not full!\n");
    return 1;
#endif
#if DEBUG_REGULAR >= 1
    printf("lqcheck:="); qseq_print(lq); printf(";\n");
#endif
    assert(Q->n <= Q->alloc);
    hgcd_matrix_init_set_identity(&bfQ, (len == 0 ? 1 : len));
    qseq_build_product(&bfQ, lq);
    ok = mat_is_equal(Q, &bfQ);
    if(ok == 0){
	printf("Q:="); hgcd_matrix_print(Q); printf(";\n");
	printf("bfQ:="); hgcd_matrix_print(&bfQ); printf(";\n");
	assert(0);
    }
    hgcd_matrix_clear(&bfQ);
#if DEBUG_REGULAR >= 1
    printf("----- check_Q_from_lq: ok\n");
#endif
    return ok;
}

/********** build product **********/

/** INPUT: len is an upper bound on the final size of the coeffs. 
    SIDE-EFFECT: M <- prod_p Q_p
*/
void qseq_build_product_plain_mpn(struct hgcd_matrix *Q, qseq_t lq)
{
    mp_size_t tp_alloc = qseq_card(lq), tmpn; /* FIXME: use/pass a bound like an? */
    mp_ptr tp = (mp_ptr)malloc(tp_alloc * sizeof(mp_limb_t));
    size_t len = 16;
    int i;

    /* Q <- identity matrix */
    hgcd_matrix_init_set_identity(Q, len);
#if DEBUG_REGULAR >= 1
    printf("Q:="); hgcd_matrix_print(Q);
#endif
    /* loop */
    for(i = lq->first; i < lq->last; i++)
	if(qseq_is_cell_zero(lq, i) == 0){
	    len = qseq_nl(lq, i);
	    tmpn = Q->n + (len == 0 ? 1 : len) + 1;
	    if(tmpn > tp_alloc){
#if DEBUG_REGULAR >= 1
		printf("qseq_build_product_plain_mpn: %lu > %lu\n",tmpn,tp_alloc);
#endif
		/* FIXME: too harsh? */
		tp_alloc = tmpn << 1;
		tp = realloc(tp, tp_alloc * sizeof(mp_limb_t));
	    }
	    qseq_mul_qi(Q, lq, i, tp, tp_alloc);
# if DEBUG_REGULAR >= 2
	    printf("q_%d:=", i); qseq_print_cell(lq, i);
	    printf(";\nnewQ:="); hgcd_matrix_print(Q);
# endif
# if DEBUG_REGULAR >= 2
	    printf("evalm(newQ - Q &* array([[q, 1], [1, 0]]));\n");
	    printf("Q:=newQ;\n");
# endif
	}
    free(tp);
}

/** assuming R <> E. 
*/
void qseq_build_product_plain(struct hgcd_matrix *Q, qseq_t lq)
{
    mp_ptr tp;
    mp_size_t tp_alloc;
    size_t len = 16;
    int i = lq->first;

    if(qseq_is_empty(lq) != 0 || qseq_is_cell_zero(lq, i) != 0)
	/* Q = E */
	hgcd_matrix_init_set_identity(Q, len);
    else
	qseq_init_matrix_set_q(Q, lq, i);
    if(qseq_is_empty(lq))
	return;
#if DEBUG_REGULAR >= 1
    printf("Q0:="); hgcd_matrix_print(Q);
#endif
    /* overshooting if lq->first > 0 */
    tp = (mp_ptr)malloc(lq->last * sizeof(mp_limb_t));
    tp_alloc = lq->last;
    for(++i; i < lq->last; i++)
	if(qseq_is_cell_zero(lq, i) == 0){
	    mp_size_t len = qseq_nl(lq, i), tmpn;

	    tmpn = 1 + Q->n + (len == 0 ? 1 : len);
	    if(tmpn > tp_alloc){
		tp = realloc(tp, tmpn * sizeof(mp_limb_t));
		tp_alloc = tmpn;
	    }
	    qseq_mul_qi(Q, lq, i, tp, tp_alloc);
	}
    free(tp);
}

MAYBE_UNUSED
static void print_length_T(struct hgcd_matrix *T)
{
    mp_size_t len;
    int i, j;
    
    for(i = 0; i < 2; i++){
	for(j = 0; j < 2; j++){
	    len = T->n;
	    MPN_NORMALIZE(T->p[i][j], len);
	    printf(" %lu", len);
	}
    }
    printf("\n");
}

MAYBE_UNUSED
static int tree_init(struct hgcd_matrix *T, qseq_t lq,
		     mp_ptr tp, mp_size_t tp_alloc, size_t kmax)
{
    size_t k = 0;
    int i, nq = -1;

    for(i = lq->first; i < lq->last; i++)
	if(qseq_is_cell_zero(lq, i) == 0){
	    if(k == 0){
		/* T[nq] <- [[q, 1], [1, 0]] */
		nq++;
		qseq_init_matrix_set_q(T+nq, lq, i);
	    }
	    else{
		/* T[nq] *= lq[i]=[[qi, 1], [1, 0]] */
#if DEBUG_REGULAR >= 2
		printf("q_%d:=", i); qseq_print_cell(lq, i); printf(";\n");
#endif
		qseq_mul_qi(T+nq, lq, i, tp, tp_alloc);
	    }
#if DEBUG_REGULAR >= 2
	    printf("T[%d][%d]:=", nq+1, (int)k);
	    hgcd_matrix_print(T+nq);
	    print_length_T(T+nq);
#endif
	    k++;
	    if(k == kmax)
		k = 0;
	}
    /* FIXME: we could have T[nq] = Id */
    nq++;
    return nq;
}

/** 
    SIDE-EFFECT: M <- prod_q Q[q].
*/
void qseq_build_product_tree(struct hgcd_matrix *M, qseq_t Q, size_t kmax)
{
#if TIMINGS >= 1
    double tt = runtime();
#endif
    struct hgcd_matrix *T =
	(struct hgcd_matrix *)malloc(qseq_card(Q)*sizeof(struct hgcd_matrix));
    /* M->a = O(max_q^#Q) */
    mp_size_t tp_alloc = qseq_card(Q), itch;
    mp_ptr tp = (mp_ptr)malloc(tp_alloc * sizeof(mp_limb_t));
    int i, j, nt, ntlev;

    assert(T != NULL && tp != NULL);
    /* feed T by kmax products of small matrices */
    nt = tree_init(T, Q, tp, tp_alloc, kmax);
#if TIMINGS >= 1
    fprintf(stderr, "{%d} build_product.init : %d %lf\n",
	    ty_level, nt, runtime()-tt);
# if 0
    for(i = 0; i < nt; i++){
	printf("T%d: %lu", i, T[i].n);
	print_length_T(T+i);
    }
# endif
#endif    
    /* multiply in place */
    for(ntlev = nt; ntlev > 1; ){
#if TIMINGS >= 1
	tt = runtime();
#endif
	for(i = 0, j = 0; i+1 < ntlev; i += 2, j += 1){
	    /* T[j=i/2] <- T[i]*T[i+1] */
	    itch = mpn_matrix22_mul_itch((mp_size_t)T[i].n, (mp_size_t)T[i+1].n);
	    if(itch > tp_alloc){
		tp = realloc(tp, itch * sizeof(mp_limb_t));
		tp_alloc = itch;
		printf("reallocating in build_product: %lu\n", tp_alloc);
	    }
	    hgcd_matrix_realloc(T+i, T[i].n + T[i+1].n + 1);
	    mpn_hgcd_matrix_mul(T+i, T+i+1, tp);
	    if(j > 0){
		/* j = 0 means i = 0; j > 0 => i > j */
		/* FIXME: use pointers? */
		hgcd_matrix_set(T+j, T+i);
	    }
#if DEBUG_REGULAR >= 1
	    printf("T[%d] <- T[%d]*T[%d]=", j, i, i+1);
	    hgcd_matrix_print(T+j);
#endif
	}
	if((ntlev & 1) == 0)
	    ntlev = j;
	else{
	    /* left alone matrix */
	    /* FIXME: use pointers */
	    hgcd_matrix_set(T+j, T+(ntlev-1));
	    ntlev = j+1;
	}
#if TIMINGS >= 2
	fprintf(stderr, "build_product.level: %d %lf\n", ntlev, runtime()-tt);
#endif	
    }
    hgcd_matrix_init_set(M, T);
#if DEBUG_REGULAR >= 1
    printf("MAG Mfinal:=K22!"); hgcd_matrix_print(M); printf(";\n");
#endif
    /* clean */
    for(i = 0; i < nt; i++)
	hgcd_matrix_clear(T+i);
    free(T);
    free(tp);
}

/* we assume nq > BUILD_PRODUCT_THRESHOLD2. */
static int blocking_factor(int nq)
{
    if(nq <= 250) return nq;
    else return 300;
}

/** assuming Q <> E. 

    TODO: qseq_build_product_plain_mpn should disappear.
*/
void qseq_build_product(struct hgcd_matrix *M, qseq_t lq)
{
#if DEBUG_REGULAR > 0
    printf("Entering qseq_build_product with card(lq)=%d\n", qseq_card(lq));
#endif
    if(qseq_card(lq) <= BUILD_PRODUCT_THRESHOLD1)
	qseq_build_product_plain(M, lq);
    else if(qseq_card(lq) <= BUILD_PRODUCT_THRESHOLD2)
	qseq_build_product_plain_mpn(M, lq);
    else{
#if DEBUG_REGULAR >= 1
	struct hgcd_matrix MP;

	qseq_build_product_plain_mpn(&MP, lq);
#endif
	qseq_build_product_tree(M, lq, blocking_factor(qseq_card(lq)));
#if DEBUG_REGULAR >= 1
	printf("MP00:="); MPN_PRINT(MP.p[0][0], MP.n); printf(";\n");
	printf(" M00:="); MPN_PRINT(M->p[0][0], M->n); printf(";\n");
	hgcd_matrix_clear(&MP);
#endif
    }
#if DEBUG_REGULAR >= 1
    printf("bpfQ_%d_%d:=", lq->first, lq->last); hgcd_matrix_print(M);
#endif
}

void regular_save(int *Rfirst, int *Rfirst_large, int *det, regular_t R)
{
#if DEBUG_REGULAR >= 1
    printf("save: R->Q="); hgcd_matrix_print(R->Q); printf(";\n");
    printf("R->lq="); qseq_print(R->lq); printf(";\n");
#endif
    if(qseq_is_used(R->lq) != 0){
	*Rfirst = R->lq->first;
	R->lq->first = R->lq->last;
	*Rfirst_large = R->lq->first_large;
	R->lq->first_large = R->lq->last_large;
    }
    // DET *det = R->det;
    // DET R->det = 1;
}

void regular_restore(regular_t R, int Rfirst, int Rfirst_large, int det)
{
    if(qseq_is_used(R->lq) != 0){
	R->lq->first_large = Rfirst_large;
	R->lq->first = Rfirst;
    }
    // DET R->det *= det;
}

/* Multipy R->Q by lq[Rfirst..lq->last[. */
void regular_flush_lq(regular_t R, int Rfirst)
{
    int Rtmp = R->lq->first;

    R->lq->first = Rfirst;
    gcd_mul_M(R, Rfirst);
    R->lq->first = Rtmp;
}
