/*$Id: maij.c,v 1.4 2000/06/01 20:10:27 bsmith Exp bsmith $*/
/*
    Defines the basic matrix operations for the MAIJ  matrix storage format.
  This format is used for restriction and interpolation operations for 
  multicomponent problems. It interpolates each component the same way
  independently.

     We provide:
         MatMult()
         MatMultTranspose()
         MatMultTransposeAdd()
         MatMultAdd()
          and
         MatCreateMAIJ(Mat,dof,Mat*)

     This single directory handles both the sequential and parallel codes
*/

#include "src/mat/impls/aij/mpi/mpiaij.h"

typedef struct {
  int        dof;         /* number of components */
  Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
  VecScatter ctx;         /* update ghost points for parallel case */
  Vec        w;           /* work space for ghost values for parallel case */
} Mat_MAIJ;

#undef __FUNC__  
#define __FUNC__ /*<a name="MatDestroy_MAIJ"></a>*/"MatDestroy_MAIJ" 
int MatDestroy_MAIJ(Mat A)
{
  int      ierr;
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;

  PetscFunctionBegin;
  if (b->AIJ) {
    ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
  }
  if (b->OAIJ) {
    ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
  }
  if (b->ctx) {
    ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
  }
  if (b->w) {
    ierr = VecDestroy(b->w);CHKERRQ(ierr);
  }
  ierr = PetscFree(b);CHKERRQ(ierr);
  PetscHeaderDestroy(A);
  PetscFunctionReturn(0);
}

EXTERN_C_BEGIN
#undef __FUNC__  
#define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ" 
int MatCreate_MAIJ(Mat A)
{
  int      ierr;
  Mat_MAIJ *b;

  PetscFunctionBegin;
  A->data             = (void*)(b = PetscNew(Mat_MAIJ));CHKPTRQ(b);
  ierr = PetscMemzero(b,sizeof(Mat_MAIJ));CHKERRQ(ierr);
  ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
  A->factor           = 0;
  A->mapping          = 0;

  b->AIJ  = 0;
  b->dof  = 0;  
  b->OAIJ = 0;
  b->ctx  = 0;
  b->w    = 0;  
  PetscFunctionReturn(0);
}
EXTERN_C_END

/* ----------------------------------------------------------------------------------*/
EXTERN int MatMult_SeqAIJ(Mat,Vec,Vec);
EXTERN int MatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec);
EXTERN int MatMultTranspose_SeqAIJ(Mat,Vec,Vec);
EXTERN int MatMultTransposeAdd_SeqAIJ(Mat,Vec,Vec,Vec);

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_SeqMAIJ_1"></a>*/"MatMult_SeqMAIJ_1"
int MatMult_SeqMAIJ_1(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMult_SeqAIJ(b->AIJ,xx,yy);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_1"></a>*/"MatMultTranspose_SeqMAIJ_1"
int MatMultTranspose_SeqMAIJ_1(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultTranspose_SeqAIJ(b->AIJ,xx,yy);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_1"></a>*/"MatMultAdd_SeqMAIJ_1"
int MatMultAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultAdd_SeqAIJ(b->AIJ,xx,yy,zz);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_1"></a>*/"MatMultTransposeAdd_SeqMAIJ_1"
int MatMultTransposeAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultTransposeAdd_SeqAIJ(b->AIJ,xx,yy,zz);
  PetscFunctionReturn(0);
}

/* --------------------------------------------------------------------------------------*/
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2"
int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[2*idx[jrow]];
      sum2 += v[jrow]*x[2*idx[jrow]+1];
      jrow++;
     }
    y[2*i]   = sum1;
    y[2*i+1] = sum2;
  }

  PLogFlops(4*a->nz - 2*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2"
int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,zero = 0.0;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  ierr = VecSet(&zero,yy);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[2*i];
    alpha2 = x[2*i+1];
    while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
  }
  PLogFlops(4*a->nz - 2*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2"
int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[2*idx[jrow]];
      sum2 += v[jrow]*x[2*idx[jrow]+1];
      jrow++;
     }
    y[2*i]   += sum1;
    y[2*i+1] += sum2;
  }

  PLogFlops(4*a->nz - 2*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2"
int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx   = a->j + a->i[i] + shift;
    v     = a->a + a->i[i] + shift;
    n     = a->i[i+1] - a->i[i];
    alpha1 = x[2*i];
    alpha2 = x[2*i+1];
    while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
  }
  PLogFlops(4*a->nz - 2*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
/* --------------------------------------------------------------------------------------*/
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3"
int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[3*idx[jrow]];
      sum2 += v[jrow]*x[3*idx[jrow]+1];
      sum3 += v[jrow]*x[3*idx[jrow]+2];
      jrow++;
     }
    y[3*i]   = sum1;
    y[3*i+1] = sum2;
    y[3*i+2] = sum3;
  }

  PLogFlops(6*a->nz - 3*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3"
int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  ierr = VecSet(&zero,yy);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[3*i];
    alpha2 = x[3*i+1];
    alpha3 = x[3*i+2];
    while (n-->0) {
      y[3*(*idx)]   += alpha1*(*v);
      y[3*(*idx)+1] += alpha2*(*v);
      y[3*(*idx)+2] += alpha3*(*v);
      idx++; v++;
    }
  }
  PLogFlops(6*a->nz - 3*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3"
int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[3*idx[jrow]];
      sum2 += v[jrow]*x[3*idx[jrow]+1];
      sum3 += v[jrow]*x[3*idx[jrow]+2];
      jrow++;
     }
    y[3*i]   += sum1;
    y[3*i+1] += sum2;
    y[3*i+2] += sum3;
  }

  PLogFlops(6*a->nz);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3"
int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[3*i];
    alpha2 = x[3*i+1];
    alpha3 = x[3*i+2];
    while (n-->0) {
      y[3*(*idx)]   += alpha1*(*v);
      y[3*(*idx)+1] += alpha2*(*v);
      y[3*(*idx)+2] += alpha3*(*v);
      idx++; v++;
    }
  }
  PLogFlops(6*a->nz);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

/* ------------------------------------------------------------------------------*/
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4"
int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    sum4  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[4*idx[jrow]];
      sum2 += v[jrow]*x[4*idx[jrow]+1];
      sum3 += v[jrow]*x[4*idx[jrow]+2];
      sum4 += v[jrow]*x[4*idx[jrow]+3];
      jrow++;
     }
    y[4*i]   = sum1;
    y[4*i+1] = sum2;
    y[4*i+2] = sum3;
    y[4*i+3] = sum4;
  }

  PLogFlops(8*a->nz - 4*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4"
int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  ierr = VecSet(&zero,yy);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[4*i];
    alpha2 = x[4*i+1];
    alpha3 = x[4*i+2];
    alpha4 = x[4*i+3];
    while (n-->0) {
      y[4*(*idx)]   += alpha1*(*v);
      y[4*(*idx)+1] += alpha2*(*v);
      y[4*(*idx)+2] += alpha3*(*v);
      y[4*(*idx)+3] += alpha4*(*v);
      idx++; v++;
    }
  }
  PLogFlops(8*a->nz - 4*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4"
int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    sum4  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[4*idx[jrow]];
      sum2 += v[jrow]*x[4*idx[jrow]+1];
      sum3 += v[jrow]*x[4*idx[jrow]+2];
      sum4 += v[jrow]*x[4*idx[jrow]+3];
      jrow++;
     }
    y[4*i]   += sum1;
    y[4*i+1] += sum2;
    y[4*i+2] += sum3;
    y[4*i+3] += sum4;
  }

  PLogFlops(8*a->nz - 4*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4"
int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[4*i];
    alpha2 = x[4*i+1];
    alpha3 = x[4*i+2];
    alpha4 = x[4*i+3];
    while (n-->0) {
      y[4*(*idx)]   += alpha1*(*v);
      y[4*(*idx)+1] += alpha2*(*v);
      y[4*(*idx)+2] += alpha3*(*v);
      y[4*(*idx)+3] += alpha4*(*v);
      idx++; v++;
    }
  }
  PLogFlops(8*a->nz - 4*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);

}

/* ------------------------------------------------------------------------------*/
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5"
int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    sum4  = 0.0;
    sum5  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[5*idx[jrow]];
      sum2 += v[jrow]*x[5*idx[jrow]+1];
      sum3 += v[jrow]*x[5*idx[jrow]+2];
      sum4 += v[jrow]*x[5*idx[jrow]+3];
      sum5 += v[jrow]*x[5*idx[jrow]+4];
      jrow++;
     }
    y[5*i]   = sum1;
    y[5*i+1] = sum2;
    y[5*i+2] = sum3;
    y[5*i+3] = sum4;
    y[5*i+4] = sum5;
  }

  PLogFlops(10*a->nz - 5*m);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5"
int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  ierr = VecSet(&zero,yy);CHKERRQ(ierr);
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[5*i];
    alpha2 = x[5*i+1];
    alpha3 = x[5*i+2];
    alpha4 = x[5*i+3];
    alpha5 = x[5*i+4];
    while (n-->0) {
      y[5*(*idx)]   += alpha1*(*v);
      y[5*(*idx)+1] += alpha2*(*v);
      y[5*(*idx)+2] += alpha3*(*v);
      y[5*(*idx)+3] += alpha4*(*v);
      y[5*(*idx)+4] += alpha5*(*v);
      idx++; v++;
    }
  }
  PLogFlops(10*a->nz - 5*a->n);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5"
int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
  int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
  int         n,i,jrow,j;

  PetscFunctionBegin;
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  x    = x + shift;    /* shift for Fortran start by 1 indexing */
  idx  = a->j;
  v    = a->a;
  ii   = a->i;

  v    += shift; /* shift for Fortran start by 1 indexing */
  idx  += shift;
  for (i=0; i<m; i++) {
    jrow = ii[i];
    n    = ii[i+1] - jrow;
    sum1  = 0.0;
    sum2  = 0.0;
    sum3  = 0.0;
    sum4  = 0.0;
    sum5  = 0.0;
    for (j=0; j<n; j++) {
      sum1 += v[jrow]*x[5*idx[jrow]];
      sum2 += v[jrow]*x[5*idx[jrow]+1];
      sum3 += v[jrow]*x[5*idx[jrow]+2];
      sum4 += v[jrow]*x[5*idx[jrow]+3];
      sum5 += v[jrow]*x[5*idx[jrow]+4];
      jrow++;
     }
    y[5*i]   += sum1;
    y[5*i+1] += sum2;
    y[5*i+2] += sum3;
    y[5*i+3] += sum4;
    y[5*i+4] += sum5;
  }

  PLogFlops(10*a->nz);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5"
int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
  Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
  Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
  int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;

  PetscFunctionBegin; 
  if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
  ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
  ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
  y = y + shift; /* shift for Fortran start by 1 indexing */
  for (i=0; i<m; i++) {
    idx    = a->j + a->i[i] + shift;
    v      = a->a + a->i[i] + shift;
    n      = a->i[i+1] - a->i[i];
    alpha1 = x[5*i];
    alpha2 = x[5*i+1];
    alpha3 = x[5*i+2];
    alpha4 = x[5*i+3];
    alpha5 = x[5*i+4];
    while (n-->0) {
      y[5*(*idx)]   += alpha1*(*v);
      y[5*(*idx)+1] += alpha2*(*v);
      y[5*(*idx)+2] += alpha3*(*v);
      y[5*(*idx)+3] += alpha4*(*v);
      y[5*(*idx)+4] += alpha5*(*v);
      idx++; v++;
    }
  }
  PLogFlops(10*a->nz);
  ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

/*===================================================================================*/
EXTERN int MatMult_MPIAIJ(Mat,Vec,Vec);
EXTERN int MatMultAdd_MPIAIJ(Mat,Vec,Vec,Vec);
EXTERN int MatMultTranspose_MPIAIJ(Mat,Vec,Vec);
EXTERN int MatMultTransposeAdd_MPIAIJ(Mat,Vec,Vec,Vec);

#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_MPIMAIJ_1"></a>*/"MatMult_MPIMAIJ_1"
int MatMult_MPIMAIJ_1(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMult_MPIAIJ(b->AIJ,xx,yy);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_1"></a>*/"MatMultTranspose_MPIMAIJ_1"
int MatMultTranspose_MPIMAIJ_1(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultTranspose_MPIAIJ(b->AIJ,xx,yy);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_1"></a>*/"MatMultAdd_MPIMAIJ_1"
int MatMultAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultAdd_MPIAIJ(b->AIJ,xx,yy,zz);
  PetscFunctionReturn(0);
}
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_1"></a>*/"MatMultTransposeAdd_MPIMAIJ_1"
int MatMultTransposeAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;
  ierr = MatMultTransposeAdd_MPIAIJ(b->AIJ,xx,yy,zz);
  PetscFunctionReturn(0);
}

/* ---------------------------------------------------------------------------------- */
#undef __FUNC__  
#define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof"
int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
{
  Mat_MAIJ *b = (Mat_MAIJ*)A->data;
  int      ierr;
  PetscFunctionBegin;

  /* start the scatter */
  ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
  ierr = MatMult_SeqMAIJ_2(A,xx,yy);CHKERRQ(ierr);
  ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
  ierr = MatMultAdd_SeqMAIJ_1(A,b->w,yy,yy);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

/* ---------------------------------------------------------------------------------- */
#undef __FUNC__  
#define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 
int MatCreateMAIJ(Mat A,int dof,Mat *maij)
{
  int      ierr,size,n;
  Mat_MAIJ *b;
  Mat      B;

  PetscFunctionBegin;
  ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
  ierr = MatSetType(B,MATMAIJ);CHKERRQ(ierr);

  B->assembled    = PETSC_TRUE;
  B->ops->destroy = MatDestroy_MAIJ;
  b = (Mat_MAIJ*)B->data;

  b->AIJ = A;
  b->dof = dof;
  ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
  ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
  if (size == 1) {
    if (dof == 1) {
      B->ops->mult             = MatMult_SeqMAIJ_1;
      B->ops->multadd          = MatMultAdd_SeqMAIJ_1;
      B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_1;
      B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_1;
    } else if (dof == 2) {
      B->ops->mult             = MatMult_SeqMAIJ_2;
      B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
      B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
      B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
    } else if (dof == 3) {
      B->ops->mult             = MatMult_SeqMAIJ_3;
      B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
      B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
      B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
    } else if (dof == 4) {
      B->ops->mult             = MatMult_SeqMAIJ_4;
      B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
      B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
      B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
    } else if (dof == 5) {
      B->ops->mult             = MatMult_SeqMAIJ_5;
      B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
      B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
      B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
    } else {
      SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof);
    }
  } else {
    if (dof == 1) {
      B->ops->mult             = MatMult_MPIMAIJ_1;
      B->ops->multadd          = MatMultAdd_MPIMAIJ_1;
      B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_1;
      B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_1;
    } else {
      Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
      IS         from,to;
      Vec        gvec;
      int        *garray,i;

      ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
      ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);

      /* create two temporary Index sets for build scatter gather */
      garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray);
      for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
      ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
      ierr = PetscFree(garray);CHKERRQ(ierr);
      ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);

      /* create temporary global vector to generate scatter context */
      ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);

      /* generate the scatter context */
      ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);

      ierr = ISDestroy(from);CHKERRQ(ierr);
      ierr = ISDestroy(to);CHKERRQ(ierr);
      ierr = VecDestroy(gvec);CHKERRQ(ierr);

      B->ops->mult = MatMult_MPIMAIJ_dof;
    }
  }
  *maij = B;
  PetscFunctionReturn(0);
}












