/*! \file
    \brief  2x2 matrices
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>

#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"

#include "gcd_utils.h"

/* #include "gmp_fft.h" */
#include "gcd_matrix.h"
/* #include "waksman.h" */

#define DEBUG_MATRIX 0

/**********************************************************************
**********************************************************************/

void hgcd_matrix_init(struct hgcd_matrix *A, size_t len)
{
    int i, j;
    
    A->alloc = len;
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++){
	    A->p[i][j] = (mp_ptr)malloc(len * sizeof(mp_limb_t));
	    /* overshooting? */
	    MPN_ZERO(A->p[i][j], len);
	}
    A->n = 1;
}

MAYBE_UNUSED
void hgcd_matrix_init_set(struct hgcd_matrix *A, struct hgcd_matrix *B)
{
    int i, j;

    hgcd_matrix_init(A, B->n);
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++)
	    MPN_COPY(A->p[i][j], B->p[i][j], B->n);
    A->n = B->n;
}

void hgcd_matrix_set_zero(struct hgcd_matrix *A)
{
    int i, j;

    /* FIXME: overshooting? or simplify set A->n = 1 + easy clearing? */
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++)
	    MPN_ZERO(A->p[i][j], A->n);
    A->n = 1;
}

void hgcd_matrix_set_identity(struct hgcd_matrix *A)
{
    hgcd_matrix_set_zero(A);
    A->p[0][0][0] = 1;
    A->p[1][1][0] = 1;
}

void hgcd_matrix_init_set_identity(struct hgcd_matrix *A, mp_size_t len)
{
    hgcd_matrix_init(A, len);
    A->p[0][0][0] = 1;
    A->p[1][1][0] = 1;
}

void hgcd_matrix_clear(struct hgcd_matrix *A)
{
    int i, j;

    A->n = 0;
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++){
	    free(A->p[i][j]);
	    A->p[i][j] = NULL;
	}
}

void hgcd_matrix_set(struct hgcd_matrix *M, struct hgcd_matrix *R)
{
    int i, j;
    
    if(R->alloc > M->alloc)
	hgcd_matrix_realloc(M, R->alloc);
    M->n = R->n;
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++)
	    MPN_COPY(M->p[i][j], R->p[i][j], R->n);
}

/* We suppose that A[][] contain NULL. */
MAYBE_UNUSED
static void hgcd_matrix_set_ptr(struct hgcd_matrix *A, struct hgcd_matrix *B)
{
    int i, j;

    A->n = B->n;
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++)
	    A->p[i][j] = B->p[i][j];
}

void hgcd_matrix_print(struct hgcd_matrix *A)
{
    size_t i;

    printf("[");
    for(i = 0; i < 2; i++){
	printf("[");
	MPN_PRINT(A->p[i][0], A->n);
	printf(", ");
	MPN_PRINT(A->p[i][1], A->n);
	printf("]");
	if(i == 0)
	    printf(", ");
    }
    printf("]");
}

void hgcd_matrix_realloc(struct hgcd_matrix *M, size_t sz)
{
    int i, j;
    size_t old = M->alloc, len;

    if((mp_size_t)sz <= M->alloc)
	return;
    do {
	M->alloc <<= 1;
    } while((mp_size_t)sz > M->alloc);
    len = M->alloc;
#if DEBUG_MATRIX >= 1
    fprintf(stderr, "hgcd_matrix_realloc: %lu -> %lu\n", sz, M->alloc);
#endif
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++){
	    M->p[i][j] = realloc(M->p[i][j], len * sizeof(mp_limb_t));
	    MPN_ZERO(M->p[i][j]+old, len-old); /* FIXME: really needed? */
	}
}

/* Hum, is this realistic? */
int hgcd_matrix_is_identity(struct hgcd_matrix *M)
{
    if(M->n != 1)
	return 0;
    return (M->p[0][0][0] == 1 && M->p[1][1][0] == 1
	    && M->p[0][1][0] == 0 && M->p[1][0][0] == 0);
}

int mat_is_equal(struct hgcd_matrix *M, struct hgcd_matrix *R)
{
    mp_size_t Mn, Rn;
    int ok = 1, i, j;

    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++){
	    Mn = M->n;
	    Rn = R->n;
	    MPN_NORMALIZE(M->p[i][j], Mn);
	    MPN_NORMALIZE(R->p[i][j], Rn);
	    if(Mn != Rn || mpn_cmp(M->p[i][j], R->p[i][j], Rn) != 0){
		ok = 0;
		printf("M%d%d:=",i,j); MPN_PRINT(M->p[i][j],Mn); printf(";\n");
		printf("R%d%d:=",i,j); MPN_PRINT(R->p[i][j],Rn); printf(";\n");
	    }
	}
    return ok;
}

/* Consider the operation:

[a b]   [q 1]   [q*a+b a]
[   ] * [   ] = [       ]
[c d]   [1 0]   [q*c+d c]

*/
static void hgcd_matrix_mul_q_large(struct hgcd_matrix *A,
				    mp_ptr q, mp_size_t qn,
				    mp_ptr tp, mp_size_t tp_alloc)
{
    mp_size_t tmpn = A->n + qn + 1;
    int i, j;

    assert(tmpn <= tp_alloc);
    if(tmpn > A->alloc){
#if PRINT_WARNINGS
	printf("hgcd_matrix_mul_q: %lu > %lu\n", tmpn, A->alloc);
#endif
	hgcd_matrix_realloc(A, tmpn);
    }
    /* just to be sure */
    for(i = 0; i < 2; i++)
	for(j = 0; j < 2; j++)
	    MPN_ZERO(A->p[i][j]+A->n, qn+1);
    /* A[., 1] += q*A[., 0] */
#if DEBUG_MATRIX >= 1
    printf("before:="); hgcd_matrix_print(A);
    printf("\nA->n=%lu, qn=%lu\n", A->n, qn);
    for(i = 0; i < 2; i++){
	for(j = 0; j < 2; j++){
	    int k;
	    printf("%d%d:", i, j);
	    for(k = 0; k < tmpn; k++)
		printf(" %lu", A->p[i][j][k]);
	    printf("\n");
	}
    }
#endif
    /* this is a GMP routine: [[a, b], [c, d]] -> [[a, b+a*q], [c, d+c*q]] */
    mpn_hgcd_matrix_update_q(A, q, qn, 1, tp);
    /* A->n was updated */
#if DEBUG_MATRIX >= 1
    printf("after_update:="); hgcd_matrix_print(A); printf(";\n");
#endif
    /* now we swap */
    for(i = 0; i < 2; i++)
	MP_PTR_SWAP(A->p[i][0], A->p[i][1]);
#if DEBUG_MATRIX >= 1
    printf("after_swap:="); hgcd_matrix_print(A); printf(";\n");
#endif
}

/** INPUT: q > 0; A = [[a, b], [c, d]]
    PRECONDITION: tp_alloc >= A->n + qn + 1
    SIDE-EFFECT: A *= [[q, 1], [1, 0]] => [[q*a+b, a], [q*c+d, c]]
    Rem. special code for the 60% case with q = 1.

 */
void hgcd_matrix_mul_q(struct hgcd_matrix *A, mp_ptr q, mp_size_t qn,
		       mp_ptr tp, mp_size_t tp_alloc)
{
    mp_ptr tmp;
    mp_limb_t c[2];
    int ii;
    
    if(qn == 1){
	if(q[0] == ((mp_limb_t)1)){
	    /* very frequent: A <- [[a+b, a], [c+d, c]] */
	    if(A->n == 1)
		for(ii = 0; ii < 2; ii++)
		    add_ssaaaa(c[ii], A->p[ii][1][0],
			       0, A->p[ii][0][0], 0, A->p[ii][1][0]);
	    else
		for(ii = 0; ii < 2; ii++)
		    /* A->p[ii][1] += A->p[ii][0] */
		    c[ii] = mpn_add_n(A->p[ii][1], A->p[ii][0],
				      A->p[ii][1], A->n);
	}
	else{
	    /* still the vast majority */
#if 0 /* not convincing due to two calls(?) */
	    if(A->n == 1){
		mp_limb_t tmpl;
		
                for(ii = 0; ii < 2; ii++){
		    /* (c[ii], tmpl) <- lmat[0]*q */
		    umul_ppmm(c[ii], tmpl, A->p[ii][0][0], lq);
		    /* (c[ii], A_1) <- (c[ii], tmpl) + (0, A_0) */
		    add_ssaaaa(c[ii],A->p[ii][1][0],c[ii],tmpl,0,A->p[ii][1][0]);
		}
	    }
	    else
#endif
		for(ii = 0; ii < 2; ii++)
		    c[ii] = mpn_addmul_1(A->p[ii][1], A->p[ii][0], A->n, q[0]);
	}
	if(c[0] != 0 || c[1] != 0){
	    /* need to enlarge */
	    A->n += 1;
	    if(A->n > A->alloc)
		hgcd_matrix_realloc(A, A->n);
	    for(ii = 0; ii < 2; ii++){
		/* report carry */
		A->p[ii][1][A->n-1] = c[ii];
		/* clear extra limb to have a clean swap */
		A->p[ii][0][A->n-1] = 0;
	    }
	}
	/* TODO: remove swaps by swaping the arguments above */
	for(ii = 0; ii < 2; ii++){
	    /* swap (a, a+b*q) */
	    tmp = A->p[ii][1];
	    A->p[ii][1] = A->p[ii][0];
	    A->p[ii][0] = tmp;
	}
    }
    else{
	/* rare case of qn > 1 */
#if PRINT_WARNINGS >= 1
	printf("WARNING hgcd_matrix_mul_q: rare case, large[%lu] q:=", qn);
	MPN_PRINT(q, qn); printf(";\n");
#endif
	hgcd_matrix_mul_q_large(A, q, qn, tp, tp_alloc);
    }
#if DEBUG_REGULAR >= 1
    printf("MAG Aq:=K22!"); hgcd_matrix_print(A);
    printf(";\nMAG IsZero(MyMul(Aorig, Matrix([[q, 1], [1, 0]]))-Aq);\n");
#endif
    //DET    R->det = - R->det;
}

/* HERE: fix this mul_q stuff */

#if 0
/**
[a b]   [0  1]   [b  a-q*b]
[   ] * [    ] = [        ]
[c d]   [1 -q]   [d  c-q*d]

Rem: computes in fact:

[a q*a+b]
[       ]     <== check
[b q*a+b]

swapping will be done later.

 */
void hgcd_matrix_div_q(struct hgcd_matrix *A, mp_ptr q, mp_size_t qn,
		       mp_ptr tp, mp_size_t tp_alloc)
{
    mp_size_t tmpn = A->n + qn + 1;

    assert(tmpn <= tp_alloc);
    if(tmpn > A->alloc){ /* should not happen! */
#if PRINT_WARNINGS
	printf("hgcd_matrix_mul_q: %lu > %lu\n", tmpn, A->alloc);
#endif
	hgcd_matrix_realloc(A, tmpn);
    }
    /* A[., 1] += q*A[., 0] */
#if DEBUG_MATRIX >= 1
    printf("before:="); hgcd_matrix_print(A);
#endif
    mpn_hgcd_matrix_update_q(A, q, qn, 1, tp);
#if DEBUG_MATRIX >= 1
    printf("after:="); hgcd_matrix_print(A);
#endif
}
#endif

/* R *= M, see gmp/mpn/matrix22_mul.c */
void hgcd_matrix_mul(struct hgcd_matrix *R, struct hgcd_matrix *M)
{
    mp_ptr tp;
    mp_size_t tpn = mpn_matrix22_mul_itch((mp_size_t)R->n, (mp_size_t)M->n);
    mp_size_t tmpn = R->n + M->n + 1;
    int i, j;

#if DEBUG_MATRIX >= 1
    printf("hmm_R: "); hgcd_matrix_print(R);
    printf("hmm_M: "); hgcd_matrix_print(M);
#endif
    tp = (mp_ptr)malloc(tpn * sizeof(mp_limb_t));
    if(tmpn > R->alloc){
#if DEBUG_MATRIX >= 2
	printf("W: tmpn=%lu > R->alloc=%lu\n", tmpn, R->alloc);
#endif
	for(i = 0; i < 2; i++)
	    for(j = 0; j < 2; j++)
		R->p[i][j] = realloc(R->p[i][j], tmpn * sizeof(mp_limb_t));
	R->alloc = tmpn;
    }
    mpn_hgcd_matrix_mul(R, M, tp);
#if DEBUG_MATRIX >= 1
    printf("hmm_RM: "); hgcd_matrix_print(R);
#endif
    free(tp);
}

/* abs(M^(-1) = [[p, q], [r, s]]^(-1) = abs([[s, -q], [-r, p]]). */
MAYBE_UNUSED
void hgcd_matrix_abs_inverse(struct hgcd_matrix *invM, struct hgcd_matrix *M)
{
    hgcd_matrix_init(invM, M->n);
    MPN_COPY(invM->p[0][0], M->p[1][1], M->n);
    MPN_COPY(invM->p[1][1], M->p[0][0], M->n);
    MPN_COPY(invM->p[1][0], M->p[1][0], M->n);
    MPN_COPY(invM->p[0][1], M->p[0][1], M->n);
    invM->n = M->n;
}

#if DEBUG_MATRIX >= 1
static void check_mul_vec(mp_ptr w, mp_size_t wn,
			  mp_ptr a0, mp_size_t a0n,
			  mp_ptr a1, mp_size_t a1n,
			  mp_ptr *v, mp_size_t *vn)
{
    mpz_t z0, z1, zv0, zv1, zw;
    
    mpz_init(zw);
    mpz_init_set_ui(zv0, 0);
    mpz_init_set_ui(zv1, 0);
    mpz_init_set_ui(z0, 0);
    mpz_init_set_ui(z1, 0);
    MPZ_SET_MPN(z0, a0, a0n);
    MPZ_SET_MPN(z1, a1, a1n);
    MPZ_SET_MPN(zv0, v[0], vn[0]);
    MPZ_SET_MPN(zv1, v[1], vn[1]);
    /* w <- z0 * a1 + z1 * b1 */
    mpz_mul(zw, z0, zv0);
    mpz_mul(zv1, zv1, z1);
    mpz_add(zw, zw, zv1);
    MPZ_SET_MPN(zv1, w, wn);
#if DEBUG_MATRIX >= 2
    gmp_printf("w :=%Zd;\n", zv1);
    gmp_printf("zw:=%Zd;\n", zw);
#endif
    assert(mpz_cmp(zw, zv1) == 0);
    mpz_clears(zv0, zv1, zw, z0, z1, NULL);
}
#endif

/**********************************************************************
multiplications and the like
**********************************************************************/

#ifdef USE_GMP
#else /***** USE_GMP *****/
/* w[i] <- M[i, 0]*v[0]+M[i, 1]*v[1] for i = 0, 1.*/
static void mul_vec_mpn_fft(mp_ptr *w, mp_size_t *wn,
			    mp_ptr *M, mp_size_t *Mn,
			    mp_ptr *v, mp_size_t *vn)
{
    mp_ptr tmp[2];
    mp_size_t tmpn[2];
    int i;

#if 0
    /* plain version, no sharing */
    for(i = 0; i < 2; i++){
	printf("using mpn_mul_fft_main\n");

	mpn_mul_fft_main(w[i], M[2*i], Mn[2*i], v[0], vn[0]);
	wn[i] = Mn[2*i] + vn[0];
	MPN_NORMALIZE(w[i], wn[i]);

	tmpn = Mn[2*i+1] + vn[1];
	tmp = realloc(tmp, tmpn * sizeof(mp_limb_t));
	mpn_mul_fft_main(tmp, M[2*i+1], Mn[2*i+1], v[1], vn[1]);
	MPN_NORMALIZE(tmp, tmpn);

	mpn_incr(w[i], wn+i, tmp, tmpn);
    }
#elif 1 /* sharing */
    /* w[i] = M[i, 0]*v[0]+M[i, 1]*v[1] */

    /* 1st half */
    mpn_mul_fft_2(w[0], wn, w[1], wn+1, v[0], vn[0], M[0], Mn[0], M[2], Mn[2]);

    /* 2nd half */
    for(i = 0; i < 2; i++){
	tmpn[i] = Mn[2*i+1] + vn[1];
	tmp[i] = (mp_ptr)malloc(tmpn[i] * sizeof(mp_limb_t));
    }
    mpn_mul_fft_2(tmp[0], tmpn, tmp[1], tmpn+1,
		  v[1], vn[1], M[1], Mn[1], M[3], Mn[3]);

    /* add */
    for(i = 0; i < 2; i++)
	mpn_incr(w[i], wn+i, tmp[i], tmpn[i]);
    for(i = 0; i < 2; i++)
	free(tmp[i]);
#else
    mpn_mat_vec_fft(w, wn, M, Mn, v, vn);
    for(i = 0; i < 2; i++){
	wn[i] = max(vn[0]+Mn[2*i], vn[1]+Mn[2*i+1]) + 1;
	MPN_NORMALIZE(w[i], wn[i]);
    }
#endif
}

/* INPUT: M is 2x2; N is 2xg; W is 2xg
   SIDE-EFFECT: W <- M * N.
   for i = 0, 1; for j = 0, ..., g-1:
       w[i, j] = sum(M[i, k] * N[k, j], 0 <= k < 2);
 */
static void mpn_mat_mul_mat_fft(mp_ptr *W, mp_size_t *Wn,
				mp_ptr *M, mp_size_t *Mn,
				mp_ptr *N, mp_size_t *Nn)
{
    mp_ptr v[2], w[2];
    mp_size_t vn[2], wn[2];
    int i, j, g = 2; /* TODO: make this work for g > 2 */

    /* proceed column by column */
    for(j = 0; j < g; j++){
	for(i = 0; i < 2; i++){
	    /* w[i, j] = M[i, 0]*N[0, j]+M[i, 1]*N[1, j] */
	    v[i]  =  N[g*i+j];
	    vn[i] = Nn[g*i+j];
	    w[i]  =  W[g*i+j];
	}
	mul_vec_mpn_fft(w, wn, M, Mn, v, vn);
	for(i = 0; i < 2; i++)
	    Wn[g*i+j] = w[i];
    }
}
#endif /* ifndef USE_GMP */

/* INPUT: a1 > 0, b1 >= 0.
   SIDE-EFFECT: w <- a0 * v[0] + a1 * v[1] 
*/
static void mul_z_z(mp_ptr w, mp_size_t *wn,
		    mp_ptr a0, mp_size_t a0n,
		    mp_ptr a1, mp_size_t a1n,
		    mp_ptr *v, mp_size_t *vn)
{
#if DEBUG_MATRIX >= 1
    printf("[%lu]a0:=", a0n); MPN_PRINT(a0, a0n);
    printf(";\n[%lu]a1:=", a1n); MPN_PRINT(a1, a1n);
    printf(";\n[%lu]v0:=", vn[0]); MPN_PRINT(v[0], vn[0]);
    printf(";\n[%lu]v1:=", vn[1]); MPN_PRINT(v[1], vn[1]);
    printf(";\n");
#endif
    /* compute a0 * v[0] */
    if(a0n == 0 || vn[0] == 0)
	*wn = 0;
    else{
	if(a0n >= vn[0])
	    mpn_mul(w, a0, a0n, v[0], vn[0]);
	else
	    mpn_mul(w, v[0], vn[0], a0, a0n);
	*wn = a0n+vn[0];
	MPN_NORMALIZE(w, *wn);
    }
#if DEBUG_MATRIX >= 1
    printf("[%lu]a0xv0:=", *wn); MPN_PRINT(w, *wn); printf(";\n");
#endif
    /* add a1 * v[1] */
    if(a1n != 0 && vn[1] != 0)
	gcd_addmul(w, wn, a1, a1n, v[1], vn[1]);
#if DEBUG_MATRIX >= 1
    printf("[%lu]a0xv0+a1xv1:=", *wn); MPN_PRINT(w, *wn); printf(";\n");
#endif
}

/* (w[0]; w[1]) <- M * (v[0]; v[1]) */
MAYBE_UNUSED
void mat2x2_mul_vec_mpn_plain(mp_ptr *w, mp_size_t *wn,
			      mp_ptr *M, mp_size_t *Mn,
			      mp_ptr *v, mp_size_t *vn)
{
    mul_z_z(w[0],   wn, M[0], Mn[0], M[1], Mn[1], v, vn);
    mul_z_z(w[1], wn+1, M[2], Mn[2], M[3], Mn[3], v, vn);
}

/** INPUT: A[i] has size nA+nv+1, v[i] has even size nv;
           w[i] must have size >= (nA+nv+1 + nv2);
	   wn[i] contains the size of w[i], to be updated.
 */
void mat2x2_mul_vec_mpn222(mp_ptr *w, mp_size_t *wn,
			   mp_ptr *A, mp_size_t nA,
			   mp_ptr *V, mp_size_t nv)
{
#if defined(USE_STRASSEN) || defined(USE_GMP)
    size_t tp_alloc = mpn_matrix22_mul_itch(nA, nv);
    mp_ptr tp = (mp_ptr)malloc(tp_alloc * sizeof(mp_limb_t));
    mp_size_t nv2 = nv >> 1;
    int i;

    /* A *= [[v00, v01], [v10, v11]] */
#if DEBUG_MATRIX >= 1
    for(i = 0; i < 4; i++){
	printf("A%d:=", i); MPN_PRINT(A[i], nA); printf(";\n");
    }
    printf("A:=Matrix([[A0, A1], [A2, A3]]);\n");
    for(i = 0; i < 2; i++){
	printf("V%d:=", i); MPN_PRINT(V[i], nv); printf(";\n");
	printf("v%d:=", 2*i); MPN_PRINT(V[i], nv2); printf(";\n");
	printf("v%d:=", 2*i+1); MPN_PRINT(V[i]+nv2, nv2); printf(";\n");
	printf("V%d-(v%d+B^%d*v%d);\n", i, 2*i, (int)nv2, 2*i+1);
    }
    printf("vv:=Matrix([[v0, v1], [v2, v3]]);\n");
#endif
    mpn_matrix22_mul(A[0], A[1], A[2], A[3], nA,
		     V[0], V[0]+nv2, V[1], V[1]+nv2, nv2, tp);
#if DEBUG_MATRIX >= 1
    for(i = 0; i < 4; i++){
	printf("AV%d:=", i); MPN_PRINT(A[i], nA+nv+1); printf(";\n");
    }
    printf("AV:=Matrix([[AV0, AV1], [AV2, AV3]]);\n");
    printf("evalm(A &* vv - AV);\n");
#endif
    /* now we glue: w[0] = AV[0] + B^nv2 * AV[1]
                    w[1] = AV[2] + B^nv2 * AV[3]
    */
    for(i = 0; i < 2; i++){
	mp_size_t wni0 = nA + nv + 1, wni1 = wni0;

	MPN_NORMALIZE(A[2*i], wni0);
	MPN_COPY(w[i], A[2*i], wni0);
	MPN_NORMALIZE(A[2*i+1], wni1);
	/* w[i] = [0..wni0[ + [nv2..nv2+wni1[ = [0..nv2+wni1+1[ */
	//	tmpn = nv2 + wni1 + 1;
	//	assert(tmpn <= wn[i]);
	MPN_ZERO(w[i]+wni0, wn[i]-wni0);
	assert(nv2+wni1 <= wn[i]);
	assert(mpn_add(w[i]+nv2, w[i]+nv2, wni1, A[2*i+1], wni1) == 0);
	MPN_NORMALIZE(w[i], wn[i]);
#if DEBUG_MATRIX >= 1
	printf("w%d:=", i); MPN_PRINT(w[i], wn[i]); printf(";\n");
	printf("w%d-(AV%d+B^%d*AV%d);\n", i, 2*i, (int)nv2, 2*i+1);
#endif
    }
    free(tp);
#endif
}

/**
   INPUT: M is a 2x2 matrix with lenghts in Mn.
          v is a 2x1 vector with lengths in vn.

   The idea would be to have ||M|| close to mm to multiply 2x2 x 2x2 = 2x2
   using Strassen if need be.

   If ||M|| is far away from mm, we could use 2x2 x 2xk for a suitably
   chosen k and use some fast algorithm for this.

   We cut v0 and v1 into g pieces, where ||v0||/||M|| = g.
*/
MAYBE_UNUSED
static void mat2x2_mul_vec_mpn(mp_ptr *w, mp_size_t *wn,
			       mp_ptr *M, mp_size_t *Mn,
			       mp_ptr *v, mp_size_t *vn)
{
#if TIMINGS > 0
    double tt = runtime();
#endif
    int ok;

#if 1
    ok = 0; /* one day!!! */
#else
    double ratio;
    int g;
    mp_size_t normM = Mn[0];
    for(g = 1; g < 4; g++)
	normM = max(normM, Mn[g]);
    ratio = ((double)max(vn[0], vn[1]))/((double)normM);
    /* FIXME: do we really care to deal with the case ratio < 1? */
    if(normM <= 1 || ratio < 1.5){
	ok = 0;
	g = 1;
    }
    else{
	/* FIXME: it is the best we can hope for? */
	g = (int)(ratio + 0.5);
#if DEBUG_MATRIX >= 1
	printf("||M|| = %lu vn[0] = %lu vn[1] = %lu => %2.2lf => g=%d\n",
	       normM, vn[0], vn[1], ratio, g);
#endif
	/*	g = 2;*/
	ok = mat2x2_mul_vec_cut_mpn(w, wn, M, Mn, v, vn, g);
    }
#endif
    if(ok == 0){
#if 1
	mat2x2_mul_vec_mpn_plain(w, wn, M, Mn, v, vn);
#else
	mul_vec_mpn_fft(w, wn, M, Mn, v, vn);
#endif
    }
#if DEBUG_MATRIX >= 1
    /* a z version to check the answer */
    check_mul_vec(w[0], wn[0], M[0], Mn[0], M[1], Mn[1], v, vn);
    check_mul_vec(w[1], wn[1], M[2], Mn[2], M[3], Mn[3], v, vn);
#endif
#if TIMINGS > 0
    fprintf(stderr, "{%d} mat2x2_mul_vec_mpn: %d for [%lu, %lu, %lu, %lu] "
	    "* [%lu; %lu] %lf\n",
	    ty_level, g, Mn[0], Mn[1], Mn[2], Mn[3], vn[0], vn[1],
	    runtime()-tt);
#endif
}

