xref: /petsc/src/mat/impls/maij/maij.c (revision d8d19677bbccf95218448bee62e6b87f4513e133)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38ff585edeSJed Brown .seealso: MatCreateMAIJ()
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
44b9b97703SBarry Smith 
45b9b97703SBarry Smith   PetscFunctionBegin;
46251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48b9b97703SBarry Smith   if (ismpimaij) {
49b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50b9b97703SBarry Smith 
51b9b97703SBarry Smith     *B = b->A;
52b9b97703SBarry Smith   } else if (isseqmaij) {
53b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith     *B = b->AIJ;
56b9b97703SBarry Smith   } else {
57b9b97703SBarry Smith     *B = A;
58b9b97703SBarry Smith   }
59b9b97703SBarry Smith   PetscFunctionReturn(0);
60b9b97703SBarry Smith }
61b9b97703SBarry Smith 
62480dffcfSJed Brown /*@
63ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64ff585edeSJed Brown 
653f9fe445SBarry Smith    Logically Collective
66ff585edeSJed Brown 
67*d8d19677SJose E. Roman    Input Parameters:
68ff585edeSJed Brown +  A - the MAIJ matrix
69ff585edeSJed Brown -  dof - the block size for the new matrix
70ff585edeSJed Brown 
71ff585edeSJed Brown    Output Parameter:
72ff585edeSJed Brown .  B - the new MAIJ matrix
73ff585edeSJed Brown 
74ff585edeSJed Brown    Level: advanced
75ff585edeSJed Brown 
76ff585edeSJed Brown .seealso: MatCreateMAIJ()
77ff585edeSJed Brown @*/
787087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79b9b97703SBarry Smith {
80dfbe8321SBarry Smith   PetscErrorCode ierr;
810298fd71SBarry Smith   Mat            Aij = NULL;
82b9b97703SBarry Smith 
83b9b97703SBarry Smith   PetscFunctionBegin;
84c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
85b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87b9b97703SBarry Smith   PetscFunctionReturn(0);
88b9b97703SBarry Smith }
89b9b97703SBarry Smith 
90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9182b1193eSBarry Smith {
92dfbe8321SBarry Smith   PetscErrorCode ierr;
934cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9482b1193eSBarry Smith 
9582b1193eSBarry Smith   PetscFunctionBegin;
966bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
98bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
994222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
1004cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1014cb9d9b8SBarry Smith }
1024cb9d9b8SBarry Smith 
103356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
104356c569eSBarry Smith {
105356c569eSBarry Smith   PetscFunctionBegin;
106ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107356c569eSBarry Smith }
108356c569eSBarry Smith 
1090fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1100fd73130SBarry Smith {
1110fd73130SBarry Smith   PetscErrorCode ierr;
1120fd73130SBarry Smith   Mat            B;
1130fd73130SBarry Smith 
1140fd73130SBarry Smith   PetscFunctionBegin;
115a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1160fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1176bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1180fd73130SBarry Smith   PetscFunctionReturn(0);
1190fd73130SBarry Smith }
1200fd73130SBarry Smith 
1210fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1220fd73130SBarry Smith {
1230fd73130SBarry Smith   PetscErrorCode ierr;
1240fd73130SBarry Smith   Mat            B;
1250fd73130SBarry Smith 
1260fd73130SBarry Smith   PetscFunctionBegin;
127a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1280fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1296bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1300fd73130SBarry Smith   PetscFunctionReturn(0);
1310fd73130SBarry Smith }
1320fd73130SBarry Smith 
133dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1344cb9d9b8SBarry Smith {
135dfbe8321SBarry Smith   PetscErrorCode ierr;
1364cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1374cb9d9b8SBarry Smith 
1384cb9d9b8SBarry Smith   PetscFunctionBegin;
1396bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1406bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1416bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1436bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
144bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
145bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
1464222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
147f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
148b9b97703SBarry Smith   PetscFunctionReturn(0);
149b9b97703SBarry Smith }
150b9b97703SBarry Smith 
1510bad9183SKris Buschelman /*MC
152fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1530bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1540bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1550bad9183SKris Buschelman 
1560bad9183SKris Buschelman   Operations provided:
157a2b725a8SWilliam Gropp + MatMult
1580bad9183SKris Buschelman . MatMultTranspose
1590bad9183SKris Buschelman . MatMultAdd
160a2b725a8SWilliam Gropp - MatMultTransposeAdd
1610bad9183SKris Buschelman 
1620bad9183SKris Buschelman   Level: advanced
1630bad9183SKris Buschelman 
164b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1650bad9183SKris Buschelman M*/
1660bad9183SKris Buschelman 
1678cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
16882b1193eSBarry Smith {
169dfbe8321SBarry Smith   PetscErrorCode ierr;
1704cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17164b52464SHong Zhang   PetscMPIInt    size;
17282b1193eSBarry Smith 
17382b1193eSBarry Smith   PetscFunctionBegin;
174b00a9115SJed Brown   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
175b0a32e0cSBarry Smith   A->data  = (void*)b;
17626fbe8dcSKarl Rupp 
177cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
17826fbe8dcSKarl Rupp 
179356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
180f4a53059SBarry Smith 
181f4259b30SLisandro Dalcin   b->AIJ  = NULL;
182cd3bbe55SBarry Smith   b->dof  = 0;
183f4259b30SLisandro Dalcin   b->OAIJ = NULL;
184f4259b30SLisandro Dalcin   b->ctx  = NULL;
185f4259b30SLisandro Dalcin   b->w    = NULL;
18655b25c41SPierre Jolivet   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
18764b52464SHong Zhang   if (size == 1) {
18864b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
18964b52464SHong Zhang   } else {
19064b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19164b52464SHong Zhang   }
19232e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19332e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19482b1193eSBarry Smith   PetscFunctionReturn(0);
19582b1193eSBarry Smith }
19682b1193eSBarry Smith 
197cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
198dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
19982b1193eSBarry Smith {
2004cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
201bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
202d2840e09SBarry Smith   const PetscScalar *x,*v;
203d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
204dfbe8321SBarry Smith   PetscErrorCode    ierr;
205d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
206d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
20782b1193eSBarry Smith 
208bcc973b7SBarry Smith   PetscFunctionBegin;
2093649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2101ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
211bcc973b7SBarry Smith   idx  = a->j;
212bcc973b7SBarry Smith   v    = a->a;
213bcc973b7SBarry Smith   ii   = a->i;
214bcc973b7SBarry Smith 
215bcc973b7SBarry Smith   for (i=0; i<m; i++) {
216bcc973b7SBarry Smith     jrow  = ii[i];
217bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
218bcc973b7SBarry Smith     sum1  = 0.0;
219bcc973b7SBarry Smith     sum2  = 0.0;
22026fbe8dcSKarl Rupp 
221b3c51c66SMatthew Knepley     nonzerorow += (n>0);
222bcc973b7SBarry Smith     for (j=0; j<n; j++) {
223bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
224bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
225bcc973b7SBarry Smith       jrow++;
226bcc973b7SBarry Smith     }
227bcc973b7SBarry Smith     y[2*i]   = sum1;
228bcc973b7SBarry Smith     y[2*i+1] = sum2;
229bcc973b7SBarry Smith   }
230bcc973b7SBarry Smith 
231dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2323649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2331ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23482b1193eSBarry Smith   PetscFunctionReturn(0);
23582b1193eSBarry Smith }
236bcc973b7SBarry Smith 
237dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
23882b1193eSBarry Smith {
2394cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
240bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
241d2840e09SBarry Smith   const PetscScalar *x,*v;
242d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
243dfbe8321SBarry Smith   PetscErrorCode    ierr;
244d2840e09SBarry Smith   PetscInt          n,i;
245d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
24682b1193eSBarry Smith 
247bcc973b7SBarry Smith   PetscFunctionBegin;
248d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2501ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
251bfec09a0SHong Zhang 
252bcc973b7SBarry Smith   for (i=0; i<m; i++) {
253bfec09a0SHong Zhang     idx    = a->j + a->i[i];
254bfec09a0SHong Zhang     v      = a->a + a->i[i];
255bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
256bcc973b7SBarry Smith     alpha1 = x[2*i];
257bcc973b7SBarry Smith     alpha2 = x[2*i+1];
25826fbe8dcSKarl Rupp     while (n-->0) {
25926fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
26026fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
26126fbe8dcSKarl Rupp       idx++; v++;
26226fbe8dcSKarl Rupp     }
263bcc973b7SBarry Smith   }
264dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2661ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
26782b1193eSBarry Smith   PetscFunctionReturn(0);
26882b1193eSBarry Smith }
269bcc973b7SBarry Smith 
270dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27182b1193eSBarry Smith {
2724cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
273bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
274d2840e09SBarry Smith   const PetscScalar *x,*v;
275d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
276dfbe8321SBarry Smith   PetscErrorCode    ierr;
277b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
278d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27982b1193eSBarry Smith 
280bcc973b7SBarry Smith   PetscFunctionBegin;
281f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
284bcc973b7SBarry Smith   idx  = a->j;
285bcc973b7SBarry Smith   v    = a->a;
286bcc973b7SBarry Smith   ii   = a->i;
287bcc973b7SBarry Smith 
288bcc973b7SBarry Smith   for (i=0; i<m; i++) {
289bcc973b7SBarry Smith     jrow = ii[i];
290bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
291bcc973b7SBarry Smith     sum1 = 0.0;
292bcc973b7SBarry Smith     sum2 = 0.0;
293bcc973b7SBarry Smith     for (j=0; j<n; j++) {
294bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
295bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
296bcc973b7SBarry Smith       jrow++;
297bcc973b7SBarry Smith     }
298bcc973b7SBarry Smith     y[2*i]   += sum1;
299bcc973b7SBarry Smith     y[2*i+1] += sum2;
300bcc973b7SBarry Smith   }
301bcc973b7SBarry Smith 
302dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3041ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
305bcc973b7SBarry Smith   PetscFunctionReturn(0);
30682b1193eSBarry Smith }
307dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
30882b1193eSBarry Smith {
3094cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
310bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
311d2840e09SBarry Smith   const PetscScalar *x,*v;
312d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
313dfbe8321SBarry Smith   PetscErrorCode    ierr;
314d2840e09SBarry Smith   PetscInt          n,i;
315d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
31682b1193eSBarry Smith 
317bcc973b7SBarry Smith   PetscFunctionBegin;
318f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3201ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
321bfec09a0SHong Zhang 
322bcc973b7SBarry Smith   for (i=0; i<m; i++) {
323bfec09a0SHong Zhang     idx    = a->j + a->i[i];
324bfec09a0SHong Zhang     v      = a->a + a->i[i];
325bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
326bcc973b7SBarry Smith     alpha1 = x[2*i];
327bcc973b7SBarry Smith     alpha2 = x[2*i+1];
32826fbe8dcSKarl Rupp     while (n-->0) {
32926fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
33026fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
33126fbe8dcSKarl Rupp       idx++; v++;
33226fbe8dcSKarl Rupp     }
333bcc973b7SBarry Smith   }
334dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3361ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
337bcc973b7SBarry Smith   PetscFunctionReturn(0);
33882b1193eSBarry Smith }
339cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
340dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341bcc973b7SBarry Smith {
3424cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
343bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
344d2840e09SBarry Smith   const PetscScalar *x,*v;
345d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
346dfbe8321SBarry Smith   PetscErrorCode    ierr;
347d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
348d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
34982b1193eSBarry Smith 
350bcc973b7SBarry Smith   PetscFunctionBegin;
3513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3521ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
353bcc973b7SBarry Smith   idx  = a->j;
354bcc973b7SBarry Smith   v    = a->a;
355bcc973b7SBarry Smith   ii   = a->i;
356bcc973b7SBarry Smith 
357bcc973b7SBarry Smith   for (i=0; i<m; i++) {
358bcc973b7SBarry Smith     jrow  = ii[i];
359bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
360bcc973b7SBarry Smith     sum1  = 0.0;
361bcc973b7SBarry Smith     sum2  = 0.0;
362bcc973b7SBarry Smith     sum3  = 0.0;
36326fbe8dcSKarl Rupp 
364b3c51c66SMatthew Knepley     nonzerorow += (n>0);
365bcc973b7SBarry Smith     for (j=0; j<n; j++) {
366bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
367bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
368bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
369bcc973b7SBarry Smith       jrow++;
370bcc973b7SBarry Smith      }
371bcc973b7SBarry Smith     y[3*i]   = sum1;
372bcc973b7SBarry Smith     y[3*i+1] = sum2;
373bcc973b7SBarry Smith     y[3*i+2] = sum3;
374bcc973b7SBarry Smith   }
375bcc973b7SBarry Smith 
376dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3773649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3781ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
379bcc973b7SBarry Smith   PetscFunctionReturn(0);
380bcc973b7SBarry Smith }
381bcc973b7SBarry Smith 
382dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383bcc973b7SBarry Smith {
3844cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
385bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
386d2840e09SBarry Smith   const PetscScalar *x,*v;
387d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
388dfbe8321SBarry Smith   PetscErrorCode    ierr;
389d2840e09SBarry Smith   PetscInt          n,i;
390d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
391bcc973b7SBarry Smith 
392bcc973b7SBarry Smith   PetscFunctionBegin;
393d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3951ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
396bfec09a0SHong Zhang 
397bcc973b7SBarry Smith   for (i=0; i<m; i++) {
398bfec09a0SHong Zhang     idx    = a->j + a->i[i];
399bfec09a0SHong Zhang     v      = a->a + a->i[i];
400bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
401bcc973b7SBarry Smith     alpha1 = x[3*i];
402bcc973b7SBarry Smith     alpha2 = x[3*i+1];
403bcc973b7SBarry Smith     alpha3 = x[3*i+2];
404bcc973b7SBarry Smith     while (n-->0) {
405bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
406bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
407bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
408bcc973b7SBarry Smith       idx++; v++;
409bcc973b7SBarry Smith     }
410bcc973b7SBarry Smith   }
411dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4131ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414bcc973b7SBarry Smith   PetscFunctionReturn(0);
415bcc973b7SBarry Smith }
416bcc973b7SBarry Smith 
417dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418bcc973b7SBarry Smith {
4194cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
420bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
421d2840e09SBarry Smith   const PetscScalar *x,*v;
422d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
423dfbe8321SBarry Smith   PetscErrorCode    ierr;
424b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
425d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
426bcc973b7SBarry Smith 
427bcc973b7SBarry Smith   PetscFunctionBegin;
428f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4301ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
431bcc973b7SBarry Smith   idx  = a->j;
432bcc973b7SBarry Smith   v    = a->a;
433bcc973b7SBarry Smith   ii   = a->i;
434bcc973b7SBarry Smith 
435bcc973b7SBarry Smith   for (i=0; i<m; i++) {
436bcc973b7SBarry Smith     jrow = ii[i];
437bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
438bcc973b7SBarry Smith     sum1 = 0.0;
439bcc973b7SBarry Smith     sum2 = 0.0;
440bcc973b7SBarry Smith     sum3 = 0.0;
441bcc973b7SBarry Smith     for (j=0; j<n; j++) {
442bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
443bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
444bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
445bcc973b7SBarry Smith       jrow++;
446bcc973b7SBarry Smith     }
447bcc973b7SBarry Smith     y[3*i]   += sum1;
448bcc973b7SBarry Smith     y[3*i+1] += sum2;
449bcc973b7SBarry Smith     y[3*i+2] += sum3;
450bcc973b7SBarry Smith   }
451bcc973b7SBarry Smith 
452dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
455bcc973b7SBarry Smith   PetscFunctionReturn(0);
456bcc973b7SBarry Smith }
457dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458bcc973b7SBarry Smith {
4594cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
460bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
461d2840e09SBarry Smith   const PetscScalar *x,*v;
462d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
463dfbe8321SBarry Smith   PetscErrorCode    ierr;
464d2840e09SBarry Smith   PetscInt          n,i;
465d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith   PetscFunctionBegin;
468f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4693649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4701ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
471bcc973b7SBarry Smith   for (i=0; i<m; i++) {
472bfec09a0SHong Zhang     idx    = a->j + a->i[i];
473bfec09a0SHong Zhang     v      = a->a + a->i[i];
474bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
475bcc973b7SBarry Smith     alpha1 = x[3*i];
476bcc973b7SBarry Smith     alpha2 = x[3*i+1];
477bcc973b7SBarry Smith     alpha3 = x[3*i+2];
478bcc973b7SBarry Smith     while (n-->0) {
479bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
480bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
481bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
482bcc973b7SBarry Smith       idx++; v++;
483bcc973b7SBarry Smith     }
484bcc973b7SBarry Smith   }
485dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4863649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
488bcc973b7SBarry Smith   PetscFunctionReturn(0);
489bcc973b7SBarry Smith }
490bcc973b7SBarry Smith 
491bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
492dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493bcc973b7SBarry Smith {
4944cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
495bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
496d2840e09SBarry Smith   const PetscScalar *x,*v;
497d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
498dfbe8321SBarry Smith   PetscErrorCode    ierr;
499d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
500d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
501bcc973b7SBarry Smith 
502bcc973b7SBarry Smith   PetscFunctionBegin;
5033649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5041ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
505bcc973b7SBarry Smith   idx  = a->j;
506bcc973b7SBarry Smith   v    = a->a;
507bcc973b7SBarry Smith   ii   = a->i;
508bcc973b7SBarry Smith 
509bcc973b7SBarry Smith   for (i=0; i<m; i++) {
510bcc973b7SBarry Smith     jrow        = ii[i];
511bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
512bcc973b7SBarry Smith     sum1        = 0.0;
513bcc973b7SBarry Smith     sum2        = 0.0;
514bcc973b7SBarry Smith     sum3        = 0.0;
515bcc973b7SBarry Smith     sum4        = 0.0;
516b3c51c66SMatthew Knepley     nonzerorow += (n>0);
517bcc973b7SBarry Smith     for (j=0; j<n; j++) {
518bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
519bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
520bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
521bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
522bcc973b7SBarry Smith       jrow++;
523bcc973b7SBarry Smith     }
524bcc973b7SBarry Smith     y[4*i]   = sum1;
525bcc973b7SBarry Smith     y[4*i+1] = sum2;
526bcc973b7SBarry Smith     y[4*i+2] = sum3;
527bcc973b7SBarry Smith     y[4*i+3] = sum4;
528bcc973b7SBarry Smith   }
529bcc973b7SBarry Smith 
530dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5313649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5321ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533bcc973b7SBarry Smith   PetscFunctionReturn(0);
534bcc973b7SBarry Smith }
535bcc973b7SBarry Smith 
536dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537bcc973b7SBarry Smith {
5384cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
539bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540d2840e09SBarry Smith   const PetscScalar *x,*v;
541d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
542dfbe8321SBarry Smith   PetscErrorCode    ierr;
543d2840e09SBarry Smith   PetscInt          n,i;
544d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
545bcc973b7SBarry Smith 
546bcc973b7SBarry Smith   PetscFunctionBegin;
547d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5483649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5491ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
550bcc973b7SBarry Smith   for (i=0; i<m; i++) {
551bfec09a0SHong Zhang     idx    = a->j + a->i[i];
552bfec09a0SHong Zhang     v      = a->a + a->i[i];
553bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
554bcc973b7SBarry Smith     alpha1 = x[4*i];
555bcc973b7SBarry Smith     alpha2 = x[4*i+1];
556bcc973b7SBarry Smith     alpha3 = x[4*i+2];
557bcc973b7SBarry Smith     alpha4 = x[4*i+3];
558bcc973b7SBarry Smith     while (n-->0) {
559bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
560bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
561bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
562bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
563bcc973b7SBarry Smith       idx++; v++;
564bcc973b7SBarry Smith     }
565bcc973b7SBarry Smith   }
566dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5681ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
569bcc973b7SBarry Smith   PetscFunctionReturn(0);
570bcc973b7SBarry Smith }
571bcc973b7SBarry Smith 
572dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573bcc973b7SBarry Smith {
5744cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576d2840e09SBarry Smith   const PetscScalar *x,*v;
577d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
578dfbe8321SBarry Smith   PetscErrorCode    ierr;
579b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
580d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
581f9fae5adSBarry Smith 
582f9fae5adSBarry Smith   PetscFunctionBegin;
583f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
586f9fae5adSBarry Smith   idx  = a->j;
587f9fae5adSBarry Smith   v    = a->a;
588f9fae5adSBarry Smith   ii   = a->i;
589f9fae5adSBarry Smith 
590f9fae5adSBarry Smith   for (i=0; i<m; i++) {
591f9fae5adSBarry Smith     jrow = ii[i];
592f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
593f9fae5adSBarry Smith     sum1 = 0.0;
594f9fae5adSBarry Smith     sum2 = 0.0;
595f9fae5adSBarry Smith     sum3 = 0.0;
596f9fae5adSBarry Smith     sum4 = 0.0;
597f9fae5adSBarry Smith     for (j=0; j<n; j++) {
598f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
599f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
600f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
601f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
602f9fae5adSBarry Smith       jrow++;
603f9fae5adSBarry Smith     }
604f9fae5adSBarry Smith     y[4*i]   += sum1;
605f9fae5adSBarry Smith     y[4*i+1] += sum2;
606f9fae5adSBarry Smith     y[4*i+2] += sum3;
607f9fae5adSBarry Smith     y[4*i+3] += sum4;
608f9fae5adSBarry Smith   }
609f9fae5adSBarry Smith 
610dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6113649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6121ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
613f9fae5adSBarry Smith   PetscFunctionReturn(0);
614bcc973b7SBarry Smith }
615dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616bcc973b7SBarry Smith {
6174cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
618f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
619d2840e09SBarry Smith   const PetscScalar *x,*v;
620d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
621dfbe8321SBarry Smith   PetscErrorCode    ierr;
622d2840e09SBarry Smith   PetscInt          n,i;
623d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
624f9fae5adSBarry Smith 
625f9fae5adSBarry Smith   PetscFunctionBegin;
626f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6273649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6281ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
629bfec09a0SHong Zhang 
630f9fae5adSBarry Smith   for (i=0; i<m; i++) {
631bfec09a0SHong Zhang     idx    = a->j + a->i[i];
632bfec09a0SHong Zhang     v      = a->a + a->i[i];
633f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
634f9fae5adSBarry Smith     alpha1 = x[4*i];
635f9fae5adSBarry Smith     alpha2 = x[4*i+1];
636f9fae5adSBarry Smith     alpha3 = x[4*i+2];
637f9fae5adSBarry Smith     alpha4 = x[4*i+3];
638f9fae5adSBarry Smith     while (n-->0) {
639f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
640f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
641f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
642f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
643f9fae5adSBarry Smith       idx++; v++;
644f9fae5adSBarry Smith     }
645f9fae5adSBarry Smith   }
646dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6473649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6481ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
649f9fae5adSBarry Smith   PetscFunctionReturn(0);
650f9fae5adSBarry Smith }
651f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6526cd98798SBarry Smith 
653dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654f9fae5adSBarry Smith {
6554cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
656f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
657d2840e09SBarry Smith   const PetscScalar *x,*v;
658d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
659dfbe8321SBarry Smith   PetscErrorCode    ierr;
660d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
661d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
662f9fae5adSBarry Smith 
663f9fae5adSBarry Smith   PetscFunctionBegin;
6643649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6651ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
666f9fae5adSBarry Smith   idx  = a->j;
667f9fae5adSBarry Smith   v    = a->a;
668f9fae5adSBarry Smith   ii   = a->i;
669f9fae5adSBarry Smith 
670f9fae5adSBarry Smith   for (i=0; i<m; i++) {
671f9fae5adSBarry Smith     jrow  = ii[i];
672f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
673f9fae5adSBarry Smith     sum1  = 0.0;
674f9fae5adSBarry Smith     sum2  = 0.0;
675f9fae5adSBarry Smith     sum3  = 0.0;
676f9fae5adSBarry Smith     sum4  = 0.0;
677f9fae5adSBarry Smith     sum5  = 0.0;
67826fbe8dcSKarl Rupp 
679b3c51c66SMatthew Knepley     nonzerorow += (n>0);
680f9fae5adSBarry Smith     for (j=0; j<n; j++) {
681f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
682f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
683f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
684f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
685f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
686f9fae5adSBarry Smith       jrow++;
687f9fae5adSBarry Smith     }
688f9fae5adSBarry Smith     y[5*i]   = sum1;
689f9fae5adSBarry Smith     y[5*i+1] = sum2;
690f9fae5adSBarry Smith     y[5*i+2] = sum3;
691f9fae5adSBarry Smith     y[5*i+3] = sum4;
692f9fae5adSBarry Smith     y[5*i+4] = sum5;
693f9fae5adSBarry Smith   }
694f9fae5adSBarry Smith 
695dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
6963649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6971ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
698f9fae5adSBarry Smith   PetscFunctionReturn(0);
699f9fae5adSBarry Smith }
700f9fae5adSBarry Smith 
701dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702f9fae5adSBarry Smith {
7034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
704f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
705d2840e09SBarry Smith   const PetscScalar *x,*v;
706d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707dfbe8321SBarry Smith   PetscErrorCode    ierr;
708d2840e09SBarry Smith   PetscInt          n,i;
709d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
710f9fae5adSBarry Smith 
711f9fae5adSBarry Smith   PetscFunctionBegin;
712d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7141ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
715bfec09a0SHong Zhang 
716f9fae5adSBarry Smith   for (i=0; i<m; i++) {
717bfec09a0SHong Zhang     idx    = a->j + a->i[i];
718bfec09a0SHong Zhang     v      = a->a + a->i[i];
719f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
720f9fae5adSBarry Smith     alpha1 = x[5*i];
721f9fae5adSBarry Smith     alpha2 = x[5*i+1];
722f9fae5adSBarry Smith     alpha3 = x[5*i+2];
723f9fae5adSBarry Smith     alpha4 = x[5*i+3];
724f9fae5adSBarry Smith     alpha5 = x[5*i+4];
725f9fae5adSBarry Smith     while (n-->0) {
726f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
727f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
728f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
729f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
730f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
731f9fae5adSBarry Smith       idx++; v++;
732f9fae5adSBarry Smith     }
733f9fae5adSBarry Smith   }
734dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7361ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
737f9fae5adSBarry Smith   PetscFunctionReturn(0);
738f9fae5adSBarry Smith }
739f9fae5adSBarry Smith 
740dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741f9fae5adSBarry Smith {
7424cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
743f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
744d2840e09SBarry Smith   const PetscScalar *x,*v;
745d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
746dfbe8321SBarry Smith   PetscErrorCode    ierr;
747b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
748d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
749f9fae5adSBarry Smith 
750f9fae5adSBarry Smith   PetscFunctionBegin;
751f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
754f9fae5adSBarry Smith   idx  = a->j;
755f9fae5adSBarry Smith   v    = a->a;
756f9fae5adSBarry Smith   ii   = a->i;
757f9fae5adSBarry Smith 
758f9fae5adSBarry Smith   for (i=0; i<m; i++) {
759f9fae5adSBarry Smith     jrow = ii[i];
760f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
761f9fae5adSBarry Smith     sum1 = 0.0;
762f9fae5adSBarry Smith     sum2 = 0.0;
763f9fae5adSBarry Smith     sum3 = 0.0;
764f9fae5adSBarry Smith     sum4 = 0.0;
765f9fae5adSBarry Smith     sum5 = 0.0;
766f9fae5adSBarry Smith     for (j=0; j<n; j++) {
767f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
768f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
769f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
770f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
771f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
772f9fae5adSBarry Smith       jrow++;
773f9fae5adSBarry Smith     }
774f9fae5adSBarry Smith     y[5*i]   += sum1;
775f9fae5adSBarry Smith     y[5*i+1] += sum2;
776f9fae5adSBarry Smith     y[5*i+2] += sum3;
777f9fae5adSBarry Smith     y[5*i+3] += sum4;
778f9fae5adSBarry Smith     y[5*i+4] += sum5;
779f9fae5adSBarry Smith   }
780f9fae5adSBarry Smith 
781dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7823649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
784f9fae5adSBarry Smith   PetscFunctionReturn(0);
785f9fae5adSBarry Smith }
786f9fae5adSBarry Smith 
787dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788f9fae5adSBarry Smith {
7894cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791d2840e09SBarry Smith   const PetscScalar *x,*v;
792d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793dfbe8321SBarry Smith   PetscErrorCode    ierr;
794d2840e09SBarry Smith   PetscInt          n,i;
795d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
796f9fae5adSBarry Smith 
797f9fae5adSBarry Smith   PetscFunctionBegin;
798f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7993649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8001ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
801bfec09a0SHong Zhang 
802f9fae5adSBarry Smith   for (i=0; i<m; i++) {
803bfec09a0SHong Zhang     idx    = a->j + a->i[i];
804bfec09a0SHong Zhang     v      = a->a + a->i[i];
805f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
806f9fae5adSBarry Smith     alpha1 = x[5*i];
807f9fae5adSBarry Smith     alpha2 = x[5*i+1];
808f9fae5adSBarry Smith     alpha3 = x[5*i+2];
809f9fae5adSBarry Smith     alpha4 = x[5*i+3];
810f9fae5adSBarry Smith     alpha5 = x[5*i+4];
811f9fae5adSBarry Smith     while (n-->0) {
812f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
813f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
814f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
815f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
816f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
817f9fae5adSBarry Smith       idx++; v++;
818f9fae5adSBarry Smith     }
819f9fae5adSBarry Smith   }
820dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8213649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
823f9fae5adSBarry Smith   PetscFunctionReturn(0);
824bcc973b7SBarry Smith }
825bcc973b7SBarry Smith 
8266cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
827dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8286cd98798SBarry Smith {
8296cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8306cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
831d2840e09SBarry Smith   const PetscScalar *x,*v;
832d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
833dfbe8321SBarry Smith   PetscErrorCode    ierr;
834d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
835d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8366cd98798SBarry Smith 
8376cd98798SBarry Smith   PetscFunctionBegin;
8383649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8391ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8406cd98798SBarry Smith   idx  = a->j;
8416cd98798SBarry Smith   v    = a->a;
8426cd98798SBarry Smith   ii   = a->i;
8436cd98798SBarry Smith 
8446cd98798SBarry Smith   for (i=0; i<m; i++) {
8456cd98798SBarry Smith     jrow  = ii[i];
8466cd98798SBarry Smith     n     = ii[i+1] - jrow;
8476cd98798SBarry Smith     sum1  = 0.0;
8486cd98798SBarry Smith     sum2  = 0.0;
8496cd98798SBarry Smith     sum3  = 0.0;
8506cd98798SBarry Smith     sum4  = 0.0;
8516cd98798SBarry Smith     sum5  = 0.0;
8526cd98798SBarry Smith     sum6  = 0.0;
85326fbe8dcSKarl Rupp 
854b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8556cd98798SBarry Smith     for (j=0; j<n; j++) {
8566cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8576cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8586cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8596cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8606cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8616cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8626cd98798SBarry Smith       jrow++;
8636cd98798SBarry Smith     }
8646cd98798SBarry Smith     y[6*i]   = sum1;
8656cd98798SBarry Smith     y[6*i+1] = sum2;
8666cd98798SBarry Smith     y[6*i+2] = sum3;
8676cd98798SBarry Smith     y[6*i+3] = sum4;
8686cd98798SBarry Smith     y[6*i+4] = sum5;
8696cd98798SBarry Smith     y[6*i+5] = sum6;
8706cd98798SBarry Smith   }
8716cd98798SBarry Smith 
872dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8733649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8741ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8756cd98798SBarry Smith   PetscFunctionReturn(0);
8766cd98798SBarry Smith }
8776cd98798SBarry Smith 
878dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8796cd98798SBarry Smith {
8806cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8816cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882d2840e09SBarry Smith   const PetscScalar *x,*v;
883d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884dfbe8321SBarry Smith   PetscErrorCode    ierr;
885d2840e09SBarry Smith   PetscInt          n,i;
886d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8876cd98798SBarry Smith 
8886cd98798SBarry Smith   PetscFunctionBegin;
889d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
8903649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8911ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
892bfec09a0SHong Zhang 
8936cd98798SBarry Smith   for (i=0; i<m; i++) {
894bfec09a0SHong Zhang     idx    = a->j + a->i[i];
895bfec09a0SHong Zhang     v      = a->a + a->i[i];
8966cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8976cd98798SBarry Smith     alpha1 = x[6*i];
8986cd98798SBarry Smith     alpha2 = x[6*i+1];
8996cd98798SBarry Smith     alpha3 = x[6*i+2];
9006cd98798SBarry Smith     alpha4 = x[6*i+3];
9016cd98798SBarry Smith     alpha5 = x[6*i+4];
9026cd98798SBarry Smith     alpha6 = x[6*i+5];
9036cd98798SBarry Smith     while (n-->0) {
9046cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9056cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9066cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9076cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9086cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9096cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9106cd98798SBarry Smith       idx++; v++;
9116cd98798SBarry Smith     }
9126cd98798SBarry Smith   }
913dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9151ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9166cd98798SBarry Smith   PetscFunctionReturn(0);
9176cd98798SBarry Smith }
9186cd98798SBarry Smith 
919dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9206cd98798SBarry Smith {
9216cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9226cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
923d2840e09SBarry Smith   const PetscScalar *x,*v;
924d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
925dfbe8321SBarry Smith   PetscErrorCode    ierr;
926b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
927d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9286cd98798SBarry Smith 
9296cd98798SBarry Smith   PetscFunctionBegin;
9306cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9336cd98798SBarry Smith   idx  = a->j;
9346cd98798SBarry Smith   v    = a->a;
9356cd98798SBarry Smith   ii   = a->i;
9366cd98798SBarry Smith 
9376cd98798SBarry Smith   for (i=0; i<m; i++) {
9386cd98798SBarry Smith     jrow = ii[i];
9396cd98798SBarry Smith     n    = ii[i+1] - jrow;
9406cd98798SBarry Smith     sum1 = 0.0;
9416cd98798SBarry Smith     sum2 = 0.0;
9426cd98798SBarry Smith     sum3 = 0.0;
9436cd98798SBarry Smith     sum4 = 0.0;
9446cd98798SBarry Smith     sum5 = 0.0;
9456cd98798SBarry Smith     sum6 = 0.0;
9466cd98798SBarry Smith     for (j=0; j<n; j++) {
9476cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9486cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9496cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9506cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9516cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9526cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9536cd98798SBarry Smith       jrow++;
9546cd98798SBarry Smith     }
9556cd98798SBarry Smith     y[6*i]   += sum1;
9566cd98798SBarry Smith     y[6*i+1] += sum2;
9576cd98798SBarry Smith     y[6*i+2] += sum3;
9586cd98798SBarry Smith     y[6*i+3] += sum4;
9596cd98798SBarry Smith     y[6*i+4] += sum5;
9606cd98798SBarry Smith     y[6*i+5] += sum6;
9616cd98798SBarry Smith   }
9626cd98798SBarry Smith 
963dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9643649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9651ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9666cd98798SBarry Smith   PetscFunctionReturn(0);
9676cd98798SBarry Smith }
9686cd98798SBarry Smith 
969dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9706cd98798SBarry Smith {
9716cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9726cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
973d2840e09SBarry Smith   const PetscScalar *x,*v;
974d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975dfbe8321SBarry Smith   PetscErrorCode    ierr;
976d2840e09SBarry Smith   PetscInt          n,i;
977d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9786cd98798SBarry Smith 
9796cd98798SBarry Smith   PetscFunctionBegin;
9806cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9821ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
983bfec09a0SHong Zhang 
9846cd98798SBarry Smith   for (i=0; i<m; i++) {
985bfec09a0SHong Zhang     idx    = a->j + a->i[i];
986bfec09a0SHong Zhang     v      = a->a + a->i[i];
9876cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9886cd98798SBarry Smith     alpha1 = x[6*i];
9896cd98798SBarry Smith     alpha2 = x[6*i+1];
9906cd98798SBarry Smith     alpha3 = x[6*i+2];
9916cd98798SBarry Smith     alpha4 = x[6*i+3];
9926cd98798SBarry Smith     alpha5 = x[6*i+4];
9936cd98798SBarry Smith     alpha6 = x[6*i+5];
9946cd98798SBarry Smith     while (n-->0) {
9956cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9966cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9976cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9986cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9996cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10006cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10016cd98798SBarry Smith       idx++; v++;
10026cd98798SBarry Smith     }
10036cd98798SBarry Smith   }
1004dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10076cd98798SBarry Smith   PetscFunctionReturn(0);
10086cd98798SBarry Smith }
10096cd98798SBarry Smith 
101066ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
1011ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012ed8eea03SBarry Smith {
1013ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015d2840e09SBarry Smith   const PetscScalar *x,*v;
1016d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017ed8eea03SBarry Smith   PetscErrorCode    ierr;
1018d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1019d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1020ed8eea03SBarry Smith 
1021ed8eea03SBarry Smith   PetscFunctionBegin;
10223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1023ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1024ed8eea03SBarry Smith   idx  = a->j;
1025ed8eea03SBarry Smith   v    = a->a;
1026ed8eea03SBarry Smith   ii   = a->i;
1027ed8eea03SBarry Smith 
1028ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1029ed8eea03SBarry Smith     jrow  = ii[i];
1030ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1031ed8eea03SBarry Smith     sum1  = 0.0;
1032ed8eea03SBarry Smith     sum2  = 0.0;
1033ed8eea03SBarry Smith     sum3  = 0.0;
1034ed8eea03SBarry Smith     sum4  = 0.0;
1035ed8eea03SBarry Smith     sum5  = 0.0;
1036ed8eea03SBarry Smith     sum6  = 0.0;
1037ed8eea03SBarry Smith     sum7  = 0.0;
103826fbe8dcSKarl Rupp 
1039b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1040ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1041ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1042ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1043ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1044ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1045ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1046ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1047ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1048ed8eea03SBarry Smith       jrow++;
1049ed8eea03SBarry Smith     }
1050ed8eea03SBarry Smith     y[7*i]   = sum1;
1051ed8eea03SBarry Smith     y[7*i+1] = sum2;
1052ed8eea03SBarry Smith     y[7*i+2] = sum3;
1053ed8eea03SBarry Smith     y[7*i+3] = sum4;
1054ed8eea03SBarry Smith     y[7*i+4] = sum5;
1055ed8eea03SBarry Smith     y[7*i+5] = sum6;
1056ed8eea03SBarry Smith     y[7*i+6] = sum7;
1057ed8eea03SBarry Smith   }
1058ed8eea03SBarry Smith 
1059dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10603649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1061ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1062ed8eea03SBarry Smith   PetscFunctionReturn(0);
1063ed8eea03SBarry Smith }
1064ed8eea03SBarry Smith 
1065ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066ed8eea03SBarry Smith {
1067ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1068ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1069d2840e09SBarry Smith   const PetscScalar *x,*v;
1070d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071ed8eea03SBarry Smith   PetscErrorCode    ierr;
1072d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1073d2840e09SBarry Smith   PetscInt          n,i;
1074ed8eea03SBarry Smith 
1075ed8eea03SBarry Smith   PetscFunctionBegin;
1076d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
10773649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1078ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1079ed8eea03SBarry Smith 
1080ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1081ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1082ed8eea03SBarry Smith     v      = a->a + a->i[i];
1083ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1084ed8eea03SBarry Smith     alpha1 = x[7*i];
1085ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1086ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1087ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1088ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1089ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1090ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1091ed8eea03SBarry Smith     while (n-->0) {
1092ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1093ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1094ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1095ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1096ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1097ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1098ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1099ed8eea03SBarry Smith       idx++; v++;
1100ed8eea03SBarry Smith     }
1101ed8eea03SBarry Smith   }
1102dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1104ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1105ed8eea03SBarry Smith   PetscFunctionReturn(0);
1106ed8eea03SBarry Smith }
1107ed8eea03SBarry Smith 
1108ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109ed8eea03SBarry Smith {
1110ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1111ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1112d2840e09SBarry Smith   const PetscScalar *x,*v;
1113d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114ed8eea03SBarry Smith   PetscErrorCode    ierr;
1115d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1116ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1117ed8eea03SBarry Smith 
1118ed8eea03SBarry Smith   PetscFunctionBegin;
1119ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1121ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1122ed8eea03SBarry Smith   idx  = a->j;
1123ed8eea03SBarry Smith   v    = a->a;
1124ed8eea03SBarry Smith   ii   = a->i;
1125ed8eea03SBarry Smith 
1126ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1127ed8eea03SBarry Smith     jrow = ii[i];
1128ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1129ed8eea03SBarry Smith     sum1 = 0.0;
1130ed8eea03SBarry Smith     sum2 = 0.0;
1131ed8eea03SBarry Smith     sum3 = 0.0;
1132ed8eea03SBarry Smith     sum4 = 0.0;
1133ed8eea03SBarry Smith     sum5 = 0.0;
1134ed8eea03SBarry Smith     sum6 = 0.0;
1135ed8eea03SBarry Smith     sum7 = 0.0;
1136ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1137ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1138ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1139ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1140ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1141ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1142ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1143ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1144ed8eea03SBarry Smith       jrow++;
1145ed8eea03SBarry Smith     }
1146ed8eea03SBarry Smith     y[7*i]   += sum1;
1147ed8eea03SBarry Smith     y[7*i+1] += sum2;
1148ed8eea03SBarry Smith     y[7*i+2] += sum3;
1149ed8eea03SBarry Smith     y[7*i+3] += sum4;
1150ed8eea03SBarry Smith     y[7*i+4] += sum5;
1151ed8eea03SBarry Smith     y[7*i+5] += sum6;
1152ed8eea03SBarry Smith     y[7*i+6] += sum7;
1153ed8eea03SBarry Smith   }
1154ed8eea03SBarry Smith 
1155dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1157ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1158ed8eea03SBarry Smith   PetscFunctionReturn(0);
1159ed8eea03SBarry Smith }
1160ed8eea03SBarry Smith 
1161ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162ed8eea03SBarry Smith {
1163ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1164ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1165d2840e09SBarry Smith   const PetscScalar *x,*v;
1166d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167ed8eea03SBarry Smith   PetscErrorCode    ierr;
1168d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1169d2840e09SBarry Smith   PetscInt          n,i;
1170ed8eea03SBarry Smith 
1171ed8eea03SBarry Smith   PetscFunctionBegin;
1172ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1174ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1175ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1176ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1177ed8eea03SBarry Smith     v      = a->a + a->i[i];
1178ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1179ed8eea03SBarry Smith     alpha1 = x[7*i];
1180ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1181ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1182ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1183ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1184ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1185ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1186ed8eea03SBarry Smith     while (n-->0) {
1187ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1188ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1189ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1190ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1191ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1192ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1193ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1194ed8eea03SBarry Smith       idx++; v++;
1195ed8eea03SBarry Smith     }
1196ed8eea03SBarry Smith   }
1197dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11983649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1199ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1200ed8eea03SBarry Smith   PetscFunctionReturn(0);
1201ed8eea03SBarry Smith }
1202ed8eea03SBarry Smith 
1203dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
120466ed3db0SBarry Smith {
120566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
120666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1207d2840e09SBarry Smith   const PetscScalar *x,*v;
1208d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209dfbe8321SBarry Smith   PetscErrorCode    ierr;
1210d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1211d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
121266ed3db0SBarry Smith 
121366ed3db0SBarry Smith   PetscFunctionBegin;
12143649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12151ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
121666ed3db0SBarry Smith   idx  = a->j;
121766ed3db0SBarry Smith   v    = a->a;
121866ed3db0SBarry Smith   ii   = a->i;
121966ed3db0SBarry Smith 
122066ed3db0SBarry Smith   for (i=0; i<m; i++) {
122166ed3db0SBarry Smith     jrow  = ii[i];
122266ed3db0SBarry Smith     n     = ii[i+1] - jrow;
122366ed3db0SBarry Smith     sum1  = 0.0;
122466ed3db0SBarry Smith     sum2  = 0.0;
122566ed3db0SBarry Smith     sum3  = 0.0;
122666ed3db0SBarry Smith     sum4  = 0.0;
122766ed3db0SBarry Smith     sum5  = 0.0;
122866ed3db0SBarry Smith     sum6  = 0.0;
122966ed3db0SBarry Smith     sum7  = 0.0;
123066ed3db0SBarry Smith     sum8  = 0.0;
123126fbe8dcSKarl Rupp 
1232b3c51c66SMatthew Knepley     nonzerorow += (n>0);
123366ed3db0SBarry Smith     for (j=0; j<n; j++) {
123466ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
123566ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
123666ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
123766ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
123866ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
123966ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
124066ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
124166ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
124266ed3db0SBarry Smith       jrow++;
124366ed3db0SBarry Smith     }
124466ed3db0SBarry Smith     y[8*i]   = sum1;
124566ed3db0SBarry Smith     y[8*i+1] = sum2;
124666ed3db0SBarry Smith     y[8*i+2] = sum3;
124766ed3db0SBarry Smith     y[8*i+3] = sum4;
124866ed3db0SBarry Smith     y[8*i+4] = sum5;
124966ed3db0SBarry Smith     y[8*i+5] = sum6;
125066ed3db0SBarry Smith     y[8*i+6] = sum7;
125166ed3db0SBarry Smith     y[8*i+7] = sum8;
125266ed3db0SBarry Smith   }
125366ed3db0SBarry Smith 
1254dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12561ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
125766ed3db0SBarry Smith   PetscFunctionReturn(0);
125866ed3db0SBarry Smith }
125966ed3db0SBarry Smith 
1260dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126166ed3db0SBarry Smith {
126266ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
126366ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1264d2840e09SBarry Smith   const PetscScalar *x,*v;
1265d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266dfbe8321SBarry Smith   PetscErrorCode    ierr;
1267d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1268d2840e09SBarry Smith   PetscInt          n,i;
126966ed3db0SBarry Smith 
127066ed3db0SBarry Smith   PetscFunctionBegin;
1271d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
12723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1274bfec09a0SHong Zhang 
127566ed3db0SBarry Smith   for (i=0; i<m; i++) {
1276bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1277bfec09a0SHong Zhang     v      = a->a + a->i[i];
127866ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
127966ed3db0SBarry Smith     alpha1 = x[8*i];
128066ed3db0SBarry Smith     alpha2 = x[8*i+1];
128166ed3db0SBarry Smith     alpha3 = x[8*i+2];
128266ed3db0SBarry Smith     alpha4 = x[8*i+3];
128366ed3db0SBarry Smith     alpha5 = x[8*i+4];
128466ed3db0SBarry Smith     alpha6 = x[8*i+5];
128566ed3db0SBarry Smith     alpha7 = x[8*i+6];
128666ed3db0SBarry Smith     alpha8 = x[8*i+7];
128766ed3db0SBarry Smith     while (n-->0) {
128866ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
128966ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
129066ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
129166ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
129266ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
129366ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
129466ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
129566ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
129666ed3db0SBarry Smith       idx++; v++;
129766ed3db0SBarry Smith     }
129866ed3db0SBarry Smith   }
1299dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13011ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130266ed3db0SBarry Smith   PetscFunctionReturn(0);
130366ed3db0SBarry Smith }
130466ed3db0SBarry Smith 
1305dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
130666ed3db0SBarry Smith {
130766ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
130866ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1309d2840e09SBarry Smith   const PetscScalar *x,*v;
1310d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311dfbe8321SBarry Smith   PetscErrorCode    ierr;
1312d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1313b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
131466ed3db0SBarry Smith 
131566ed3db0SBarry Smith   PetscFunctionBegin;
131666ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
131966ed3db0SBarry Smith   idx  = a->j;
132066ed3db0SBarry Smith   v    = a->a;
132166ed3db0SBarry Smith   ii   = a->i;
132266ed3db0SBarry Smith 
132366ed3db0SBarry Smith   for (i=0; i<m; i++) {
132466ed3db0SBarry Smith     jrow = ii[i];
132566ed3db0SBarry Smith     n    = ii[i+1] - jrow;
132666ed3db0SBarry Smith     sum1 = 0.0;
132766ed3db0SBarry Smith     sum2 = 0.0;
132866ed3db0SBarry Smith     sum3 = 0.0;
132966ed3db0SBarry Smith     sum4 = 0.0;
133066ed3db0SBarry Smith     sum5 = 0.0;
133166ed3db0SBarry Smith     sum6 = 0.0;
133266ed3db0SBarry Smith     sum7 = 0.0;
133366ed3db0SBarry Smith     sum8 = 0.0;
133466ed3db0SBarry Smith     for (j=0; j<n; j++) {
133566ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
133666ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
133766ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
133866ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
133966ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
134066ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
134166ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
134266ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
134366ed3db0SBarry Smith       jrow++;
134466ed3db0SBarry Smith     }
134566ed3db0SBarry Smith     y[8*i]   += sum1;
134666ed3db0SBarry Smith     y[8*i+1] += sum2;
134766ed3db0SBarry Smith     y[8*i+2] += sum3;
134866ed3db0SBarry Smith     y[8*i+3] += sum4;
134966ed3db0SBarry Smith     y[8*i+4] += sum5;
135066ed3db0SBarry Smith     y[8*i+5] += sum6;
135166ed3db0SBarry Smith     y[8*i+6] += sum7;
135266ed3db0SBarry Smith     y[8*i+7] += sum8;
135366ed3db0SBarry Smith   }
135466ed3db0SBarry Smith 
1355dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13571ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
135866ed3db0SBarry Smith   PetscFunctionReturn(0);
135966ed3db0SBarry Smith }
136066ed3db0SBarry Smith 
1361dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136266ed3db0SBarry Smith {
136366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1365d2840e09SBarry Smith   const PetscScalar *x,*v;
1366d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367dfbe8321SBarry Smith   PetscErrorCode    ierr;
1368d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1369d2840e09SBarry Smith   PetscInt          n,i;
137066ed3db0SBarry Smith 
137166ed3db0SBarry Smith   PetscFunctionBegin;
137266ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13741ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137566ed3db0SBarry Smith   for (i=0; i<m; i++) {
1376bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1377bfec09a0SHong Zhang     v      = a->a + a->i[i];
137866ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
137966ed3db0SBarry Smith     alpha1 = x[8*i];
138066ed3db0SBarry Smith     alpha2 = x[8*i+1];
138166ed3db0SBarry Smith     alpha3 = x[8*i+2];
138266ed3db0SBarry Smith     alpha4 = x[8*i+3];
138366ed3db0SBarry Smith     alpha5 = x[8*i+4];
138466ed3db0SBarry Smith     alpha6 = x[8*i+5];
138566ed3db0SBarry Smith     alpha7 = x[8*i+6];
138666ed3db0SBarry Smith     alpha8 = x[8*i+7];
138766ed3db0SBarry Smith     while (n-->0) {
138866ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
138966ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
139066ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
139166ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
139266ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
139366ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
139466ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
139566ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
139666ed3db0SBarry Smith       idx++; v++;
139766ed3db0SBarry Smith     }
139866ed3db0SBarry Smith   }
1399dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14011ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14022f7816d4SBarry Smith   PetscFunctionReturn(0);
14032f7816d4SBarry Smith }
14042f7816d4SBarry Smith 
14052b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1406dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14072b692628SMatthew Knepley {
14082b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14092b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1410d2840e09SBarry Smith   const PetscScalar *x,*v;
1411d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412dfbe8321SBarry Smith   PetscErrorCode    ierr;
1413d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1414d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14152b692628SMatthew Knepley 
14162b692628SMatthew Knepley   PetscFunctionBegin;
14173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14181ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14192b692628SMatthew Knepley   idx  = a->j;
14202b692628SMatthew Knepley   v    = a->a;
14212b692628SMatthew Knepley   ii   = a->i;
14222b692628SMatthew Knepley 
14232b692628SMatthew Knepley   for (i=0; i<m; i++) {
14242b692628SMatthew Knepley     jrow  = ii[i];
14252b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14262b692628SMatthew Knepley     sum1  = 0.0;
14272b692628SMatthew Knepley     sum2  = 0.0;
14282b692628SMatthew Knepley     sum3  = 0.0;
14292b692628SMatthew Knepley     sum4  = 0.0;
14302b692628SMatthew Knepley     sum5  = 0.0;
14312b692628SMatthew Knepley     sum6  = 0.0;
14322b692628SMatthew Knepley     sum7  = 0.0;
14332b692628SMatthew Knepley     sum8  = 0.0;
14342b692628SMatthew Knepley     sum9  = 0.0;
143526fbe8dcSKarl Rupp 
1436b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14372b692628SMatthew Knepley     for (j=0; j<n; j++) {
14382b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14392b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14402b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14412b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14422b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14432b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14442b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14452b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14462b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14472b692628SMatthew Knepley       jrow++;
14482b692628SMatthew Knepley     }
14492b692628SMatthew Knepley     y[9*i]   = sum1;
14502b692628SMatthew Knepley     y[9*i+1] = sum2;
14512b692628SMatthew Knepley     y[9*i+2] = sum3;
14522b692628SMatthew Knepley     y[9*i+3] = sum4;
14532b692628SMatthew Knepley     y[9*i+4] = sum5;
14542b692628SMatthew Knepley     y[9*i+5] = sum6;
14552b692628SMatthew Knepley     y[9*i+6] = sum7;
14562b692628SMatthew Knepley     y[9*i+7] = sum8;
14572b692628SMatthew Knepley     y[9*i+8] = sum9;
14582b692628SMatthew Knepley   }
14592b692628SMatthew Knepley 
1460dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14621ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14632b692628SMatthew Knepley   PetscFunctionReturn(0);
14642b692628SMatthew Knepley }
14652b692628SMatthew Knepley 
1466b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1467b9cda22cSBarry Smith 
1468dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14692b692628SMatthew Knepley {
14702b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14712b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1472d2840e09SBarry Smith   const PetscScalar *x,*v;
1473d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474dfbe8321SBarry Smith   PetscErrorCode    ierr;
1475d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1476d2840e09SBarry Smith   PetscInt          n,i;
14772b692628SMatthew Knepley 
14782b692628SMatthew Knepley   PetscFunctionBegin;
1479d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
14803649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14811ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14822b692628SMatthew Knepley 
14832b692628SMatthew Knepley   for (i=0; i<m; i++) {
14842b692628SMatthew Knepley     idx    = a->j + a->i[i];
14852b692628SMatthew Knepley     v      = a->a + a->i[i];
14862b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14872b692628SMatthew Knepley     alpha1 = x[9*i];
14882b692628SMatthew Knepley     alpha2 = x[9*i+1];
14892b692628SMatthew Knepley     alpha3 = x[9*i+2];
14902b692628SMatthew Knepley     alpha4 = x[9*i+3];
14912b692628SMatthew Knepley     alpha5 = x[9*i+4];
14922b692628SMatthew Knepley     alpha6 = x[9*i+5];
14932b692628SMatthew Knepley     alpha7 = x[9*i+6];
14942b692628SMatthew Knepley     alpha8 = x[9*i+7];
14952f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14962b692628SMatthew Knepley     while (n-->0) {
14972b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14982b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14992b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15002b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15012b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15022b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15032b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15042b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15052b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15062b692628SMatthew Knepley       idx++; v++;
15072b692628SMatthew Knepley     }
15082b692628SMatthew Knepley   }
1509dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15103649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15111ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15122b692628SMatthew Knepley   PetscFunctionReturn(0);
15132b692628SMatthew Knepley }
15142b692628SMatthew Knepley 
1515dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15162b692628SMatthew Knepley {
15172b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15182b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1519d2840e09SBarry Smith   const PetscScalar *x,*v;
1520d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521dfbe8321SBarry Smith   PetscErrorCode    ierr;
1522d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1523b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15242b692628SMatthew Knepley 
15252b692628SMatthew Knepley   PetscFunctionBegin;
15262b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15273649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15281ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15292b692628SMatthew Knepley   idx  = a->j;
15302b692628SMatthew Knepley   v    = a->a;
15312b692628SMatthew Knepley   ii   = a->i;
15322b692628SMatthew Knepley 
15332b692628SMatthew Knepley   for (i=0; i<m; i++) {
15342b692628SMatthew Knepley     jrow = ii[i];
15352b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15362b692628SMatthew Knepley     sum1 = 0.0;
15372b692628SMatthew Knepley     sum2 = 0.0;
15382b692628SMatthew Knepley     sum3 = 0.0;
15392b692628SMatthew Knepley     sum4 = 0.0;
15402b692628SMatthew Knepley     sum5 = 0.0;
15412b692628SMatthew Knepley     sum6 = 0.0;
15422b692628SMatthew Knepley     sum7 = 0.0;
15432b692628SMatthew Knepley     sum8 = 0.0;
15442b692628SMatthew Knepley     sum9 = 0.0;
15452b692628SMatthew Knepley     for (j=0; j<n; j++) {
15462b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15472b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15482b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15492b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15502b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15512b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15522b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15532b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15542b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15552b692628SMatthew Knepley       jrow++;
15562b692628SMatthew Knepley     }
15572b692628SMatthew Knepley     y[9*i]   += sum1;
15582b692628SMatthew Knepley     y[9*i+1] += sum2;
15592b692628SMatthew Knepley     y[9*i+2] += sum3;
15602b692628SMatthew Knepley     y[9*i+3] += sum4;
15612b692628SMatthew Knepley     y[9*i+4] += sum5;
15622b692628SMatthew Knepley     y[9*i+5] += sum6;
15632b692628SMatthew Knepley     y[9*i+6] += sum7;
15642b692628SMatthew Knepley     y[9*i+7] += sum8;
15652b692628SMatthew Knepley     y[9*i+8] += sum9;
15662b692628SMatthew Knepley   }
15672b692628SMatthew Knepley 
1568ca0c957dSBarry Smith   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15693649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15701ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15712b692628SMatthew Knepley   PetscFunctionReturn(0);
15722b692628SMatthew Knepley }
15732b692628SMatthew Knepley 
1574dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15752b692628SMatthew Knepley {
15762b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15772b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1578d2840e09SBarry Smith   const PetscScalar *x,*v;
1579d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580dfbe8321SBarry Smith   PetscErrorCode    ierr;
1581d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1582d2840e09SBarry Smith   PetscInt          n,i;
15832b692628SMatthew Knepley 
15842b692628SMatthew Knepley   PetscFunctionBegin;
15852b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15882b692628SMatthew Knepley   for (i=0; i<m; i++) {
15892b692628SMatthew Knepley     idx    = a->j + a->i[i];
15902b692628SMatthew Knepley     v      = a->a + a->i[i];
15912b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15922b692628SMatthew Knepley     alpha1 = x[9*i];
15932b692628SMatthew Knepley     alpha2 = x[9*i+1];
15942b692628SMatthew Knepley     alpha3 = x[9*i+2];
15952b692628SMatthew Knepley     alpha4 = x[9*i+3];
15962b692628SMatthew Knepley     alpha5 = x[9*i+4];
15972b692628SMatthew Knepley     alpha6 = x[9*i+5];
15982b692628SMatthew Knepley     alpha7 = x[9*i+6];
15992b692628SMatthew Knepley     alpha8 = x[9*i+7];
16002b692628SMatthew Knepley     alpha9 = x[9*i+8];
16012b692628SMatthew Knepley     while (n-->0) {
16022b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16032b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16042b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16052b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16062b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16072b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16082b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16092b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16102b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16112b692628SMatthew Knepley       idx++; v++;
16122b692628SMatthew Knepley     }
16132b692628SMatthew Knepley   }
1614dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16172b692628SMatthew Knepley   PetscFunctionReturn(0);
16182b692628SMatthew Knepley }
1619b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620b9cda22cSBarry Smith {
1621b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1622b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1623d2840e09SBarry Smith   const PetscScalar *x,*v;
1624d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625b9cda22cSBarry Smith   PetscErrorCode    ierr;
1626d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1627d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1628b9cda22cSBarry Smith 
1629b9cda22cSBarry Smith   PetscFunctionBegin;
16303649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1631b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1632b9cda22cSBarry Smith   idx  = a->j;
1633b9cda22cSBarry Smith   v    = a->a;
1634b9cda22cSBarry Smith   ii   = a->i;
1635b9cda22cSBarry Smith 
1636b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1637b9cda22cSBarry Smith     jrow  = ii[i];
1638b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1639b9cda22cSBarry Smith     sum1  = 0.0;
1640b9cda22cSBarry Smith     sum2  = 0.0;
1641b9cda22cSBarry Smith     sum3  = 0.0;
1642b9cda22cSBarry Smith     sum4  = 0.0;
1643b9cda22cSBarry Smith     sum5  = 0.0;
1644b9cda22cSBarry Smith     sum6  = 0.0;
1645b9cda22cSBarry Smith     sum7  = 0.0;
1646b9cda22cSBarry Smith     sum8  = 0.0;
1647b9cda22cSBarry Smith     sum9  = 0.0;
1648b9cda22cSBarry Smith     sum10 = 0.0;
164926fbe8dcSKarl Rupp 
1650b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1651b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1652b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1653b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1654b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1655b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1656b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1657b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1658b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1659b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1660b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1661b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1662b9cda22cSBarry Smith       jrow++;
1663b9cda22cSBarry Smith     }
1664b9cda22cSBarry Smith     y[10*i]   = sum1;
1665b9cda22cSBarry Smith     y[10*i+1] = sum2;
1666b9cda22cSBarry Smith     y[10*i+2] = sum3;
1667b9cda22cSBarry Smith     y[10*i+3] = sum4;
1668b9cda22cSBarry Smith     y[10*i+4] = sum5;
1669b9cda22cSBarry Smith     y[10*i+5] = sum6;
1670b9cda22cSBarry Smith     y[10*i+6] = sum7;
1671b9cda22cSBarry Smith     y[10*i+7] = sum8;
1672b9cda22cSBarry Smith     y[10*i+8] = sum9;
1673b9cda22cSBarry Smith     y[10*i+9] = sum10;
1674b9cda22cSBarry Smith   }
1675b9cda22cSBarry Smith 
1676dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
16773649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1678b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1679b9cda22cSBarry Smith   PetscFunctionReturn(0);
1680b9cda22cSBarry Smith }
1681b9cda22cSBarry Smith 
1682b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683b9cda22cSBarry Smith {
1684b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1685b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1686d2840e09SBarry Smith   const PetscScalar *x,*v;
1687d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688b9cda22cSBarry Smith   PetscErrorCode    ierr;
1689d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1690b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1691b9cda22cSBarry Smith 
1692b9cda22cSBarry Smith   PetscFunctionBegin;
1693b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1695b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1696b9cda22cSBarry Smith   idx  = a->j;
1697b9cda22cSBarry Smith   v    = a->a;
1698b9cda22cSBarry Smith   ii   = a->i;
1699b9cda22cSBarry Smith 
1700b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1701b9cda22cSBarry Smith     jrow  = ii[i];
1702b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1703b9cda22cSBarry Smith     sum1  = 0.0;
1704b9cda22cSBarry Smith     sum2  = 0.0;
1705b9cda22cSBarry Smith     sum3  = 0.0;
1706b9cda22cSBarry Smith     sum4  = 0.0;
1707b9cda22cSBarry Smith     sum5  = 0.0;
1708b9cda22cSBarry Smith     sum6  = 0.0;
1709b9cda22cSBarry Smith     sum7  = 0.0;
1710b9cda22cSBarry Smith     sum8  = 0.0;
1711b9cda22cSBarry Smith     sum9  = 0.0;
1712b9cda22cSBarry Smith     sum10 = 0.0;
1713b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1714b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1715b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1716b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1717b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1718b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1719b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1720b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1721b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1722b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1723b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1724b9cda22cSBarry Smith       jrow++;
1725b9cda22cSBarry Smith     }
1726b9cda22cSBarry Smith     y[10*i]   += sum1;
1727b9cda22cSBarry Smith     y[10*i+1] += sum2;
1728b9cda22cSBarry Smith     y[10*i+2] += sum3;
1729b9cda22cSBarry Smith     y[10*i+3] += sum4;
1730b9cda22cSBarry Smith     y[10*i+4] += sum5;
1731b9cda22cSBarry Smith     y[10*i+5] += sum6;
1732b9cda22cSBarry Smith     y[10*i+6] += sum7;
1733b9cda22cSBarry Smith     y[10*i+7] += sum8;
1734b9cda22cSBarry Smith     y[10*i+8] += sum9;
1735b9cda22cSBarry Smith     y[10*i+9] += sum10;
1736b9cda22cSBarry Smith   }
1737b9cda22cSBarry Smith 
1738dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1740b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1741b9cda22cSBarry Smith   PetscFunctionReturn(0);
1742b9cda22cSBarry Smith }
1743b9cda22cSBarry Smith 
1744b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745b9cda22cSBarry Smith {
1746b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1747b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1748d2840e09SBarry Smith   const PetscScalar *x,*v;
1749d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750b9cda22cSBarry Smith   PetscErrorCode    ierr;
1751d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1752d2840e09SBarry Smith   PetscInt          n,i;
1753b9cda22cSBarry Smith 
1754b9cda22cSBarry Smith   PetscFunctionBegin;
1755d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
17563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1757b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1758b9cda22cSBarry Smith 
1759b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1760b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1761b9cda22cSBarry Smith     v       = a->a + a->i[i];
1762b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1763e29fdc22SBarry Smith     alpha1  = x[10*i];
1764e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1765e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1766e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1767e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1768e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1769e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1770e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1771e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1772b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1773b9cda22cSBarry Smith     while (n-->0) {
1774e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1775e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1776e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1777e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1778e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1779e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1780e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1781e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1782e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1783b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1784b9cda22cSBarry Smith       idx++; v++;
1785b9cda22cSBarry Smith     }
1786b9cda22cSBarry Smith   }
1787dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1789b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1790b9cda22cSBarry Smith   PetscFunctionReturn(0);
1791b9cda22cSBarry Smith }
1792b9cda22cSBarry Smith 
1793b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794b9cda22cSBarry Smith {
1795b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1796b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1797d2840e09SBarry Smith   const PetscScalar *x,*v;
1798d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799b9cda22cSBarry Smith   PetscErrorCode    ierr;
1800d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1801d2840e09SBarry Smith   PetscInt          n,i;
1802b9cda22cSBarry Smith 
1803b9cda22cSBarry Smith   PetscFunctionBegin;
1804b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18053649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1806b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1807b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1808b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1809b9cda22cSBarry Smith     v       = a->a + a->i[i];
1810b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1811b9cda22cSBarry Smith     alpha1  = x[10*i];
1812b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1813b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1814b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1815b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1816b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1817b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1818b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1819b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1820b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1821b9cda22cSBarry Smith     while (n-->0) {
1822b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1823b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1824b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1825b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1826b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1827b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1828b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1829b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1830b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1831b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1832b9cda22cSBarry Smith       idx++; v++;
1833b9cda22cSBarry Smith     }
1834b9cda22cSBarry Smith   }
1835dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18363649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1837b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1838b9cda22cSBarry Smith   PetscFunctionReturn(0);
1839b9cda22cSBarry Smith }
1840b9cda22cSBarry Smith 
18412f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1842dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1843dbdd7285SBarry Smith {
1844dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1845dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1846d2840e09SBarry Smith   const PetscScalar *x,*v;
1847d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1848dbdd7285SBarry Smith   PetscErrorCode    ierr;
1849d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1850d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1851dbdd7285SBarry Smith 
1852dbdd7285SBarry Smith   PetscFunctionBegin;
18533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1854dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1855dbdd7285SBarry Smith   idx  = a->j;
1856dbdd7285SBarry Smith   v    = a->a;
1857dbdd7285SBarry Smith   ii   = a->i;
1858dbdd7285SBarry Smith 
1859dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1860dbdd7285SBarry Smith     jrow  = ii[i];
1861dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1862dbdd7285SBarry Smith     sum1  = 0.0;
1863dbdd7285SBarry Smith     sum2  = 0.0;
1864dbdd7285SBarry Smith     sum3  = 0.0;
1865dbdd7285SBarry Smith     sum4  = 0.0;
1866dbdd7285SBarry Smith     sum5  = 0.0;
1867dbdd7285SBarry Smith     sum6  = 0.0;
1868dbdd7285SBarry Smith     sum7  = 0.0;
1869dbdd7285SBarry Smith     sum8  = 0.0;
1870dbdd7285SBarry Smith     sum9  = 0.0;
1871dbdd7285SBarry Smith     sum10 = 0.0;
1872dbdd7285SBarry Smith     sum11 = 0.0;
187326fbe8dcSKarl Rupp 
1874dbdd7285SBarry Smith     nonzerorow += (n>0);
1875dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1876dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1877dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1878dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1879dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1880dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1881dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1882dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1883dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1884dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1885dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1886dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1887dbdd7285SBarry Smith       jrow++;
1888dbdd7285SBarry Smith     }
1889dbdd7285SBarry Smith     y[11*i]    = sum1;
1890dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1891dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1892dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1893dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1894dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1895dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1896dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1897dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1898dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1899dbdd7285SBarry Smith     y[11*i+10] = sum11;
1900dbdd7285SBarry Smith   }
1901dbdd7285SBarry Smith 
1902ca0c957dSBarry Smith   ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1904dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1905dbdd7285SBarry Smith   PetscFunctionReturn(0);
1906dbdd7285SBarry Smith }
1907dbdd7285SBarry Smith 
1908dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1909dbdd7285SBarry Smith {
1910dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1911dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1912d2840e09SBarry Smith   const PetscScalar *x,*v;
1913d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1914dbdd7285SBarry Smith   PetscErrorCode    ierr;
1915d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1916dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1917dbdd7285SBarry Smith 
1918dbdd7285SBarry Smith   PetscFunctionBegin;
1919dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1921dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1922dbdd7285SBarry Smith   idx  = a->j;
1923dbdd7285SBarry Smith   v    = a->a;
1924dbdd7285SBarry Smith   ii   = a->i;
1925dbdd7285SBarry Smith 
1926dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1927dbdd7285SBarry Smith     jrow  = ii[i];
1928dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1929dbdd7285SBarry Smith     sum1  = 0.0;
1930dbdd7285SBarry Smith     sum2  = 0.0;
1931dbdd7285SBarry Smith     sum3  = 0.0;
1932dbdd7285SBarry Smith     sum4  = 0.0;
1933dbdd7285SBarry Smith     sum5  = 0.0;
1934dbdd7285SBarry Smith     sum6  = 0.0;
1935dbdd7285SBarry Smith     sum7  = 0.0;
1936dbdd7285SBarry Smith     sum8  = 0.0;
1937dbdd7285SBarry Smith     sum9  = 0.0;
1938dbdd7285SBarry Smith     sum10 = 0.0;
1939dbdd7285SBarry Smith     sum11 = 0.0;
1940dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1941dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1942dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1943dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1944dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1945dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1946dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1947dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1948dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1949dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1950dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1951dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1952dbdd7285SBarry Smith       jrow++;
1953dbdd7285SBarry Smith     }
1954dbdd7285SBarry Smith     y[11*i]    += sum1;
1955dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1956dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1957dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1958dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1959dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1960dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1961dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1962dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1963dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1964dbdd7285SBarry Smith     y[11*i+10] += sum11;
1965dbdd7285SBarry Smith   }
1966dbdd7285SBarry Smith 
1967ca0c957dSBarry Smith   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
19683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1969dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1970dbdd7285SBarry Smith   PetscFunctionReturn(0);
1971dbdd7285SBarry Smith }
1972dbdd7285SBarry Smith 
1973dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1974dbdd7285SBarry Smith {
1975dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1976dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1977d2840e09SBarry Smith   const PetscScalar *x,*v;
1978d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1979dbdd7285SBarry Smith   PetscErrorCode    ierr;
1980d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1981d2840e09SBarry Smith   PetscInt          n,i;
1982dbdd7285SBarry Smith 
1983dbdd7285SBarry Smith   PetscFunctionBegin;
1984d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
19853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1986dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1987dbdd7285SBarry Smith 
1988dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1989dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1990dbdd7285SBarry Smith     v       = a->a + a->i[i];
1991dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1992dbdd7285SBarry Smith     alpha1  = x[11*i];
1993dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1994dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1995dbdd7285SBarry Smith     alpha4  = x[11*i+3];
1996dbdd7285SBarry Smith     alpha5  = x[11*i+4];
1997dbdd7285SBarry Smith     alpha6  = x[11*i+5];
1998dbdd7285SBarry Smith     alpha7  = x[11*i+6];
1999dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2000dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2001dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2002dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2003dbdd7285SBarry Smith     while (n-->0) {
2004dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2005dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2006dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2007dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2008dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2009dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2010dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2015dbdd7285SBarry Smith       idx++; v++;
2016dbdd7285SBarry Smith     }
2017dbdd7285SBarry Smith   }
2018ca0c957dSBarry Smith   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
20193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2020dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2021dbdd7285SBarry Smith   PetscFunctionReturn(0);
2022dbdd7285SBarry Smith }
2023dbdd7285SBarry Smith 
2024dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2025dbdd7285SBarry Smith {
2026dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2027dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2028d2840e09SBarry Smith   const PetscScalar *x,*v;
2029d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2030dbdd7285SBarry Smith   PetscErrorCode    ierr;
2031d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2032d2840e09SBarry Smith   PetscInt          n,i;
2033dbdd7285SBarry Smith 
2034dbdd7285SBarry Smith   PetscFunctionBegin;
2035dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2037dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2038dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2039dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2040dbdd7285SBarry Smith     v       = a->a + a->i[i];
2041dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2042dbdd7285SBarry Smith     alpha1  = x[11*i];
2043dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2044dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2045dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2046dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2047dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2048dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2049dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2050dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2051dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2052dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2053dbdd7285SBarry Smith     while (n-->0) {
2054dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2055dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2056dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2057dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2058dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2059dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2060dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2061dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2062dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2063dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2064dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2065dbdd7285SBarry Smith       idx++; v++;
2066dbdd7285SBarry Smith     }
2067dbdd7285SBarry Smith   }
2068ca0c957dSBarry Smith   ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr);
20693649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2070dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2071dbdd7285SBarry Smith   PetscFunctionReturn(0);
2072dbdd7285SBarry Smith }
2073dbdd7285SBarry Smith 
2074dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2075dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20762f7816d4SBarry Smith {
20772f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20782f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2079d2840e09SBarry Smith   const PetscScalar *x,*v;
2080d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20812f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2082dfbe8321SBarry Smith   PetscErrorCode    ierr;
2083d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2084d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20852f7816d4SBarry Smith 
20862f7816d4SBarry Smith   PetscFunctionBegin;
20873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
20881ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
20892f7816d4SBarry Smith   idx  = a->j;
20902f7816d4SBarry Smith   v    = a->a;
20912f7816d4SBarry Smith   ii   = a->i;
20922f7816d4SBarry Smith 
20932f7816d4SBarry Smith   for (i=0; i<m; i++) {
20942f7816d4SBarry Smith     jrow  = ii[i];
20952f7816d4SBarry Smith     n     = ii[i+1] - jrow;
20962f7816d4SBarry Smith     sum1  = 0.0;
20972f7816d4SBarry Smith     sum2  = 0.0;
20982f7816d4SBarry Smith     sum3  = 0.0;
20992f7816d4SBarry Smith     sum4  = 0.0;
21002f7816d4SBarry Smith     sum5  = 0.0;
21012f7816d4SBarry Smith     sum6  = 0.0;
21022f7816d4SBarry Smith     sum7  = 0.0;
21032f7816d4SBarry Smith     sum8  = 0.0;
21042f7816d4SBarry Smith     sum9  = 0.0;
21052f7816d4SBarry Smith     sum10 = 0.0;
21062f7816d4SBarry Smith     sum11 = 0.0;
21072f7816d4SBarry Smith     sum12 = 0.0;
21082f7816d4SBarry Smith     sum13 = 0.0;
21092f7816d4SBarry Smith     sum14 = 0.0;
21102f7816d4SBarry Smith     sum15 = 0.0;
21112f7816d4SBarry Smith     sum16 = 0.0;
211226fbe8dcSKarl Rupp 
2113b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21142f7816d4SBarry Smith     for (j=0; j<n; j++) {
21152f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21162f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21172f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21182f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21192f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21202f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21212f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21222f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21232f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21242f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21252f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21262f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21272f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21282f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21292f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21302f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21312f7816d4SBarry Smith       jrow++;
21322f7816d4SBarry Smith     }
21332f7816d4SBarry Smith     y[16*i]    = sum1;
21342f7816d4SBarry Smith     y[16*i+1]  = sum2;
21352f7816d4SBarry Smith     y[16*i+2]  = sum3;
21362f7816d4SBarry Smith     y[16*i+3]  = sum4;
21372f7816d4SBarry Smith     y[16*i+4]  = sum5;
21382f7816d4SBarry Smith     y[16*i+5]  = sum6;
21392f7816d4SBarry Smith     y[16*i+6]  = sum7;
21402f7816d4SBarry Smith     y[16*i+7]  = sum8;
21412f7816d4SBarry Smith     y[16*i+8]  = sum9;
21422f7816d4SBarry Smith     y[16*i+9]  = sum10;
21432f7816d4SBarry Smith     y[16*i+10] = sum11;
21442f7816d4SBarry Smith     y[16*i+11] = sum12;
21452f7816d4SBarry Smith     y[16*i+12] = sum13;
21462f7816d4SBarry Smith     y[16*i+13] = sum14;
21472f7816d4SBarry Smith     y[16*i+14] = sum15;
21482f7816d4SBarry Smith     y[16*i+15] = sum16;
21492f7816d4SBarry Smith   }
21502f7816d4SBarry Smith 
2151dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
21523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
21531ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21542f7816d4SBarry Smith   PetscFunctionReturn(0);
21552f7816d4SBarry Smith }
21562f7816d4SBarry Smith 
2157dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21582f7816d4SBarry Smith {
21592f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21602f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2161d2840e09SBarry Smith   const PetscScalar *x,*v;
2162d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21632f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2164dfbe8321SBarry Smith   PetscErrorCode    ierr;
2165d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2166d2840e09SBarry Smith   PetscInt          n,i;
21672f7816d4SBarry Smith 
21682f7816d4SBarry Smith   PetscFunctionBegin;
2169d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
21703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21711ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2172bfec09a0SHong Zhang 
21732f7816d4SBarry Smith   for (i=0; i<m; i++) {
2174bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2175bfec09a0SHong Zhang     v       = a->a + a->i[i];
21762f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21772f7816d4SBarry Smith     alpha1  = x[16*i];
21782f7816d4SBarry Smith     alpha2  = x[16*i+1];
21792f7816d4SBarry Smith     alpha3  = x[16*i+2];
21802f7816d4SBarry Smith     alpha4  = x[16*i+3];
21812f7816d4SBarry Smith     alpha5  = x[16*i+4];
21822f7816d4SBarry Smith     alpha6  = x[16*i+5];
21832f7816d4SBarry Smith     alpha7  = x[16*i+6];
21842f7816d4SBarry Smith     alpha8  = x[16*i+7];
21852f7816d4SBarry Smith     alpha9  = x[16*i+8];
21862f7816d4SBarry Smith     alpha10 = x[16*i+9];
21872f7816d4SBarry Smith     alpha11 = x[16*i+10];
21882f7816d4SBarry Smith     alpha12 = x[16*i+11];
21892f7816d4SBarry Smith     alpha13 = x[16*i+12];
21902f7816d4SBarry Smith     alpha14 = x[16*i+13];
21912f7816d4SBarry Smith     alpha15 = x[16*i+14];
21922f7816d4SBarry Smith     alpha16 = x[16*i+15];
21932f7816d4SBarry Smith     while (n-->0) {
21942f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
21952f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
21962f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
21972f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
21982f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
21992f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22002f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22012f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22022f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22032f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22042f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22052f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22062f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22072f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22082f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22092f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22102f7816d4SBarry Smith       idx++; v++;
22112f7816d4SBarry Smith     }
22122f7816d4SBarry Smith   }
2213dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22151ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22162f7816d4SBarry Smith   PetscFunctionReturn(0);
22172f7816d4SBarry Smith }
22182f7816d4SBarry Smith 
2219dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22202f7816d4SBarry Smith {
22212f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22222f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2223d2840e09SBarry Smith   const PetscScalar *x,*v;
2224d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22252f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2226dfbe8321SBarry Smith   PetscErrorCode    ierr;
2227d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2228b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
22292f7816d4SBarry Smith 
22302f7816d4SBarry Smith   PetscFunctionBegin;
22312f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
22323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
22342f7816d4SBarry Smith   idx  = a->j;
22352f7816d4SBarry Smith   v    = a->a;
22362f7816d4SBarry Smith   ii   = a->i;
22372f7816d4SBarry Smith 
22382f7816d4SBarry Smith   for (i=0; i<m; i++) {
22392f7816d4SBarry Smith     jrow  = ii[i];
22402f7816d4SBarry Smith     n     = ii[i+1] - jrow;
22412f7816d4SBarry Smith     sum1  = 0.0;
22422f7816d4SBarry Smith     sum2  = 0.0;
22432f7816d4SBarry Smith     sum3  = 0.0;
22442f7816d4SBarry Smith     sum4  = 0.0;
22452f7816d4SBarry Smith     sum5  = 0.0;
22462f7816d4SBarry Smith     sum6  = 0.0;
22472f7816d4SBarry Smith     sum7  = 0.0;
22482f7816d4SBarry Smith     sum8  = 0.0;
22492f7816d4SBarry Smith     sum9  = 0.0;
22502f7816d4SBarry Smith     sum10 = 0.0;
22512f7816d4SBarry Smith     sum11 = 0.0;
22522f7816d4SBarry Smith     sum12 = 0.0;
22532f7816d4SBarry Smith     sum13 = 0.0;
22542f7816d4SBarry Smith     sum14 = 0.0;
22552f7816d4SBarry Smith     sum15 = 0.0;
22562f7816d4SBarry Smith     sum16 = 0.0;
22572f7816d4SBarry Smith     for (j=0; j<n; j++) {
22582f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22592f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22602f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22612f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22622f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22632f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22642f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22652f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22662f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22672f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22682f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22692f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22702f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22712f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22722f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22732f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22742f7816d4SBarry Smith       jrow++;
22752f7816d4SBarry Smith     }
22762f7816d4SBarry Smith     y[16*i]    += sum1;
22772f7816d4SBarry Smith     y[16*i+1]  += sum2;
22782f7816d4SBarry Smith     y[16*i+2]  += sum3;
22792f7816d4SBarry Smith     y[16*i+3]  += sum4;
22802f7816d4SBarry Smith     y[16*i+4]  += sum5;
22812f7816d4SBarry Smith     y[16*i+5]  += sum6;
22822f7816d4SBarry Smith     y[16*i+6]  += sum7;
22832f7816d4SBarry Smith     y[16*i+7]  += sum8;
22842f7816d4SBarry Smith     y[16*i+8]  += sum9;
22852f7816d4SBarry Smith     y[16*i+9]  += sum10;
22862f7816d4SBarry Smith     y[16*i+10] += sum11;
22872f7816d4SBarry Smith     y[16*i+11] += sum12;
22882f7816d4SBarry Smith     y[16*i+12] += sum13;
22892f7816d4SBarry Smith     y[16*i+13] += sum14;
22902f7816d4SBarry Smith     y[16*i+14] += sum15;
22912f7816d4SBarry Smith     y[16*i+15] += sum16;
22922f7816d4SBarry Smith   }
22932f7816d4SBarry Smith 
2294dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22953649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22961ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
22972f7816d4SBarry Smith   PetscFunctionReturn(0);
22982f7816d4SBarry Smith }
22992f7816d4SBarry Smith 
2300dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23012f7816d4SBarry Smith {
23022f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23032f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304d2840e09SBarry Smith   const PetscScalar *x,*v;
2305d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23062f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2307dfbe8321SBarry Smith   PetscErrorCode    ierr;
2308d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2309d2840e09SBarry Smith   PetscInt          n,i;
23102f7816d4SBarry Smith 
23112f7816d4SBarry Smith   PetscFunctionBegin;
23122f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23141ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23152f7816d4SBarry Smith   for (i=0; i<m; i++) {
2316bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2317bfec09a0SHong Zhang     v       = a->a + a->i[i];
23182f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
23192f7816d4SBarry Smith     alpha1  = x[16*i];
23202f7816d4SBarry Smith     alpha2  = x[16*i+1];
23212f7816d4SBarry Smith     alpha3  = x[16*i+2];
23222f7816d4SBarry Smith     alpha4  = x[16*i+3];
23232f7816d4SBarry Smith     alpha5  = x[16*i+4];
23242f7816d4SBarry Smith     alpha6  = x[16*i+5];
23252f7816d4SBarry Smith     alpha7  = x[16*i+6];
23262f7816d4SBarry Smith     alpha8  = x[16*i+7];
23272f7816d4SBarry Smith     alpha9  = x[16*i+8];
23282f7816d4SBarry Smith     alpha10 = x[16*i+9];
23292f7816d4SBarry Smith     alpha11 = x[16*i+10];
23302f7816d4SBarry Smith     alpha12 = x[16*i+11];
23312f7816d4SBarry Smith     alpha13 = x[16*i+12];
23322f7816d4SBarry Smith     alpha14 = x[16*i+13];
23332f7816d4SBarry Smith     alpha15 = x[16*i+14];
23342f7816d4SBarry Smith     alpha16 = x[16*i+15];
23352f7816d4SBarry Smith     while (n-->0) {
23362f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
23372f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
23382f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
23392f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
23402f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23412f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23422f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23432f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23442f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23452f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23462f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23472f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23482f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23492f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23502f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23512f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23522f7816d4SBarry Smith       idx++; v++;
23532f7816d4SBarry Smith     }
23542f7816d4SBarry Smith   }
2355dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23571ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
235866ed3db0SBarry Smith   PetscFunctionReturn(0);
235966ed3db0SBarry Smith }
236066ed3db0SBarry Smith 
2361ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2362ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2363ed1c418cSBarry Smith {
2364ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2365ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2366d2840e09SBarry Smith   const PetscScalar *x,*v;
2367d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2368ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2369ed1c418cSBarry Smith   PetscErrorCode    ierr;
2370d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2371d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2372ed1c418cSBarry Smith 
2373ed1c418cSBarry Smith   PetscFunctionBegin;
23743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2375ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2376ed1c418cSBarry Smith   idx  = a->j;
2377ed1c418cSBarry Smith   v    = a->a;
2378ed1c418cSBarry Smith   ii   = a->i;
2379ed1c418cSBarry Smith 
2380ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2381ed1c418cSBarry Smith     jrow  = ii[i];
2382ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2383ed1c418cSBarry Smith     sum1  = 0.0;
2384ed1c418cSBarry Smith     sum2  = 0.0;
2385ed1c418cSBarry Smith     sum3  = 0.0;
2386ed1c418cSBarry Smith     sum4  = 0.0;
2387ed1c418cSBarry Smith     sum5  = 0.0;
2388ed1c418cSBarry Smith     sum6  = 0.0;
2389ed1c418cSBarry Smith     sum7  = 0.0;
2390ed1c418cSBarry Smith     sum8  = 0.0;
2391ed1c418cSBarry Smith     sum9  = 0.0;
2392ed1c418cSBarry Smith     sum10 = 0.0;
2393ed1c418cSBarry Smith     sum11 = 0.0;
2394ed1c418cSBarry Smith     sum12 = 0.0;
2395ed1c418cSBarry Smith     sum13 = 0.0;
2396ed1c418cSBarry Smith     sum14 = 0.0;
2397ed1c418cSBarry Smith     sum15 = 0.0;
2398ed1c418cSBarry Smith     sum16 = 0.0;
2399ed1c418cSBarry Smith     sum17 = 0.0;
2400ed1c418cSBarry Smith     sum18 = 0.0;
240126fbe8dcSKarl Rupp 
2402ed1c418cSBarry Smith     nonzerorow += (n>0);
2403ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2404ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2405ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2406ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2407ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2408ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2409ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2410ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2411ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2412ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2413ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2414ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2415ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2416ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2417ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2418ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2419ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2420ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2421ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2422ed1c418cSBarry Smith       jrow++;
2423ed1c418cSBarry Smith     }
2424ed1c418cSBarry Smith     y[18*i]    = sum1;
2425ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2426ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2427ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2428ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2429ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2430ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2431ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2432ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2433ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2434ed1c418cSBarry Smith     y[18*i+10] = sum11;
2435ed1c418cSBarry Smith     y[18*i+11] = sum12;
2436ed1c418cSBarry Smith     y[18*i+12] = sum13;
2437ed1c418cSBarry Smith     y[18*i+13] = sum14;
2438ed1c418cSBarry Smith     y[18*i+14] = sum15;
2439ed1c418cSBarry Smith     y[18*i+15] = sum16;
2440ed1c418cSBarry Smith     y[18*i+16] = sum17;
2441ed1c418cSBarry Smith     y[18*i+17] = sum18;
2442ed1c418cSBarry Smith   }
2443ed1c418cSBarry Smith 
2444dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
24453649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2446ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2447ed1c418cSBarry Smith   PetscFunctionReturn(0);
2448ed1c418cSBarry Smith }
2449ed1c418cSBarry Smith 
2450ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451ed1c418cSBarry Smith {
2452ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454d2840e09SBarry Smith   const PetscScalar *x,*v;
2455d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2456ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2457ed1c418cSBarry Smith   PetscErrorCode    ierr;
2458d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2459d2840e09SBarry Smith   PetscInt          n,i;
2460ed1c418cSBarry Smith 
2461ed1c418cSBarry Smith   PetscFunctionBegin;
2462d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
24633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2464ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2465ed1c418cSBarry Smith 
2466ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2467ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2468ed1c418cSBarry Smith     v       = a->a + a->i[i];
2469ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2470ed1c418cSBarry Smith     alpha1  = x[18*i];
2471ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2472ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2473ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2474ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2475ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2476ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2477ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2478ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2479ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2480ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2481ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2482ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2483ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2484ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2485ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2486ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2487ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2488ed1c418cSBarry Smith     while (n-->0) {
2489ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2490ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2491ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2492ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2493ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2494ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2495ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2496ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2497ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2498ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2499ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2500ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2501ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2502ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2503ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2504ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2505ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2506ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2507ed1c418cSBarry Smith       idx++; v++;
2508ed1c418cSBarry Smith     }
2509ed1c418cSBarry Smith   }
2510dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25113649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2512ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2513ed1c418cSBarry Smith   PetscFunctionReturn(0);
2514ed1c418cSBarry Smith }
2515ed1c418cSBarry Smith 
2516ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2517ed1c418cSBarry Smith {
2518ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2519ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2520d2840e09SBarry Smith   const PetscScalar *x,*v;
2521d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2522ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2523ed1c418cSBarry Smith   PetscErrorCode    ierr;
2524d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2525ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2526ed1c418cSBarry Smith 
2527ed1c418cSBarry Smith   PetscFunctionBegin;
2528ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
25293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2530ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2531ed1c418cSBarry Smith   idx  = a->j;
2532ed1c418cSBarry Smith   v    = a->a;
2533ed1c418cSBarry Smith   ii   = a->i;
2534ed1c418cSBarry Smith 
2535ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2536ed1c418cSBarry Smith     jrow  = ii[i];
2537ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2538ed1c418cSBarry Smith     sum1  = 0.0;
2539ed1c418cSBarry Smith     sum2  = 0.0;
2540ed1c418cSBarry Smith     sum3  = 0.0;
2541ed1c418cSBarry Smith     sum4  = 0.0;
2542ed1c418cSBarry Smith     sum5  = 0.0;
2543ed1c418cSBarry Smith     sum6  = 0.0;
2544ed1c418cSBarry Smith     sum7  = 0.0;
2545ed1c418cSBarry Smith     sum8  = 0.0;
2546ed1c418cSBarry Smith     sum9  = 0.0;
2547ed1c418cSBarry Smith     sum10 = 0.0;
2548ed1c418cSBarry Smith     sum11 = 0.0;
2549ed1c418cSBarry Smith     sum12 = 0.0;
2550ed1c418cSBarry Smith     sum13 = 0.0;
2551ed1c418cSBarry Smith     sum14 = 0.0;
2552ed1c418cSBarry Smith     sum15 = 0.0;
2553ed1c418cSBarry Smith     sum16 = 0.0;
2554ed1c418cSBarry Smith     sum17 = 0.0;
2555ed1c418cSBarry Smith     sum18 = 0.0;
2556ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2557ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2558ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2559ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2560ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2561ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2562ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2563ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2564ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2565ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2566ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2567ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2568ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2569ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2570ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2571ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2572ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2573ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2574ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2575ed1c418cSBarry Smith       jrow++;
2576ed1c418cSBarry Smith     }
2577ed1c418cSBarry Smith     y[18*i]    += sum1;
2578ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2579ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2580ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2581ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2582ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2583ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2584ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2585ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2586ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2587ed1c418cSBarry Smith     y[18*i+10] += sum11;
2588ed1c418cSBarry Smith     y[18*i+11] += sum12;
2589ed1c418cSBarry Smith     y[18*i+12] += sum13;
2590ed1c418cSBarry Smith     y[18*i+13] += sum14;
2591ed1c418cSBarry Smith     y[18*i+14] += sum15;
2592ed1c418cSBarry Smith     y[18*i+15] += sum16;
2593ed1c418cSBarry Smith     y[18*i+16] += sum17;
2594ed1c418cSBarry Smith     y[18*i+17] += sum18;
2595ed1c418cSBarry Smith   }
2596ed1c418cSBarry Smith 
2597dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25983649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2599ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2600ed1c418cSBarry Smith   PetscFunctionReturn(0);
2601ed1c418cSBarry Smith }
2602ed1c418cSBarry Smith 
2603ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2604ed1c418cSBarry Smith {
2605ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2606ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2607d2840e09SBarry Smith   const PetscScalar *x,*v;
2608d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2609ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2610ed1c418cSBarry Smith   PetscErrorCode    ierr;
2611d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2612d2840e09SBarry Smith   PetscInt          n,i;
2613ed1c418cSBarry Smith 
2614ed1c418cSBarry Smith   PetscFunctionBegin;
2615ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2617ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2618ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2619ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2620ed1c418cSBarry Smith     v       = a->a + a->i[i];
2621ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2622ed1c418cSBarry Smith     alpha1  = x[18*i];
2623ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2624ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2625ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2626ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2627ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2628ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2629ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2630ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2631ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2632ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2633ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2634ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2635ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2636ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2637ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2638ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2639ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2640ed1c418cSBarry Smith     while (n-->0) {
2641ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2642ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2643ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2644ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2645ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2646ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2647ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2648ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2649ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2650ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2651ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2652ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2653ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2654ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2655ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2656ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2657ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2658ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2659ed1c418cSBarry Smith       idx++; v++;
2660ed1c418cSBarry Smith     }
2661ed1c418cSBarry Smith   }
2662dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2664ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2665ed1c418cSBarry Smith   PetscFunctionReturn(0);
2666ed1c418cSBarry Smith }
2667ed1c418cSBarry Smith 
26686861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26696861f193SBarry Smith {
26706861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26716861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26726861f193SBarry Smith   const PetscScalar *x,*v;
26736861f193SBarry Smith   PetscScalar       *y,*sums;
26746861f193SBarry Smith   PetscErrorCode    ierr;
26756861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26766861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26776861f193SBarry Smith 
26786861f193SBarry Smith   PetscFunctionBegin;
26793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
26806861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
26816861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
26826861f193SBarry Smith   idx  = a->j;
26836861f193SBarry Smith   v    = a->a;
26846861f193SBarry Smith   ii   = a->i;
26856861f193SBarry Smith 
26866861f193SBarry Smith   for (i=0; i<m; i++) {
26876861f193SBarry Smith     jrow = ii[i];
26886861f193SBarry Smith     n    = ii[i+1] - jrow;
26896861f193SBarry Smith     sums = y + dof*i;
26906861f193SBarry Smith     for (j=0; j<n; j++) {
26916861f193SBarry Smith       for (k=0; k<dof; k++) {
26926861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26936861f193SBarry Smith       }
26946861f193SBarry Smith       jrow++;
26956861f193SBarry Smith     }
26966861f193SBarry Smith   }
26976861f193SBarry Smith 
26986861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
26993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27006861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27016861f193SBarry Smith   PetscFunctionReturn(0);
27026861f193SBarry Smith }
27036861f193SBarry Smith 
27046861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27056861f193SBarry Smith {
27066861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27076861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27086861f193SBarry Smith   const PetscScalar *x,*v;
27096861f193SBarry Smith   PetscScalar       *y,*sums;
27106861f193SBarry Smith   PetscErrorCode    ierr;
27116861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27126861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27136861f193SBarry Smith 
27146861f193SBarry Smith   PetscFunctionBegin;
27156861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27176861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27186861f193SBarry Smith   idx  = a->j;
27196861f193SBarry Smith   v    = a->a;
27206861f193SBarry Smith   ii   = a->i;
27216861f193SBarry Smith 
27226861f193SBarry Smith   for (i=0; i<m; i++) {
27236861f193SBarry Smith     jrow = ii[i];
27246861f193SBarry Smith     n    = ii[i+1] - jrow;
27256861f193SBarry Smith     sums = y + dof*i;
27266861f193SBarry Smith     for (j=0; j<n; j++) {
27276861f193SBarry Smith       for (k=0; k<dof; k++) {
27286861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27296861f193SBarry Smith       }
27306861f193SBarry Smith       jrow++;
27316861f193SBarry Smith     }
27326861f193SBarry Smith   }
27336861f193SBarry Smith 
27346861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27366861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
27376861f193SBarry Smith   PetscFunctionReturn(0);
27386861f193SBarry Smith }
27396861f193SBarry Smith 
27406861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27416861f193SBarry Smith {
27426861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27436861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27446861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27456861f193SBarry Smith   PetscScalar       *y;
27466861f193SBarry Smith   PetscErrorCode    ierr;
27476861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27486861f193SBarry Smith   PetscInt          n,i,k;
27496861f193SBarry Smith 
27506861f193SBarry Smith   PetscFunctionBegin;
27513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27526861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27536861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27546861f193SBarry Smith   for (i=0; i<m; i++) {
27556861f193SBarry Smith     idx   = a->j + a->i[i];
27566861f193SBarry Smith     v     = a->a + a->i[i];
27576861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27586861f193SBarry Smith     alpha = x + dof*i;
27596861f193SBarry Smith     while (n-->0) {
27606861f193SBarry Smith       for (k=0; k<dof; k++) {
27616861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27626861f193SBarry Smith       }
276383ce7366SBarry Smith       idx++; v++;
27646861f193SBarry Smith     }
27656861f193SBarry Smith   }
27666861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27686861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27696861f193SBarry Smith   PetscFunctionReturn(0);
27706861f193SBarry Smith }
27716861f193SBarry Smith 
27726861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27736861f193SBarry Smith {
27746861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27756861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27766861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27776861f193SBarry Smith   PetscScalar       *y;
27786861f193SBarry Smith   PetscErrorCode    ierr;
27796861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27806861f193SBarry Smith   PetscInt          n,i,k;
27816861f193SBarry Smith 
27826861f193SBarry Smith   PetscFunctionBegin;
27836861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27856861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27866861f193SBarry Smith   for (i=0; i<m; i++) {
27876861f193SBarry Smith     idx   = a->j + a->i[i];
27886861f193SBarry Smith     v     = a->a + a->i[i];
27896861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27906861f193SBarry Smith     alpha = x + dof*i;
27916861f193SBarry Smith     while (n-->0) {
27926861f193SBarry Smith       for (k=0; k<dof; k++) {
27936861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27946861f193SBarry Smith       }
279583ce7366SBarry Smith       idx++; v++;
27966861f193SBarry Smith     }
27976861f193SBarry Smith   }
27986861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28006861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28016861f193SBarry Smith   PetscFunctionReturn(0);
28026861f193SBarry Smith }
28036861f193SBarry Smith 
2804f4a53059SBarry Smith /*===================================================================================*/
2805dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2806f4a53059SBarry Smith {
28074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2808dfbe8321SBarry Smith   PetscErrorCode ierr;
2809f4a53059SBarry Smith 
2810b24ad042SBarry Smith   PetscFunctionBegin;
2811f4a53059SBarry Smith   /* start the scatter */
2812ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28134cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2814ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28154cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2816f4a53059SBarry Smith   PetscFunctionReturn(0);
2817f4a53059SBarry Smith }
2818f4a53059SBarry Smith 
2819dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
28204cb9d9b8SBarry Smith {
28214cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2822dfbe8321SBarry Smith   PetscErrorCode ierr;
2823b24ad042SBarry Smith 
28244cb9d9b8SBarry Smith   PetscFunctionBegin;
28254cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
28264cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2827ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2828ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28294cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28304cb9d9b8SBarry Smith }
28314cb9d9b8SBarry Smith 
2832dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28334cb9d9b8SBarry Smith {
28344cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2835dfbe8321SBarry Smith   PetscErrorCode ierr;
28364cb9d9b8SBarry Smith 
2837b24ad042SBarry Smith   PetscFunctionBegin;
28384cb9d9b8SBarry Smith   /* start the scatter */
2839ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2840d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2841ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
28434cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28444cb9d9b8SBarry Smith }
28454cb9d9b8SBarry Smith 
2846dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28474cb9d9b8SBarry Smith {
28484cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2849dfbe8321SBarry Smith   PetscErrorCode ierr;
2850b24ad042SBarry Smith 
28514cb9d9b8SBarry Smith   PetscFunctionBegin;
28524cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2853d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2854e4a140f6SJunchao Zhang   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2855ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28564cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28574cb9d9b8SBarry Smith }
28584cb9d9b8SBarry Smith 
285995ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28604222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
28614222ddf1SHong Zhang {
28624222ddf1SHong Zhang   Mat_Product    *product = C->product;
28634222ddf1SHong Zhang 
28644222ddf1SHong Zhang   PetscFunctionBegin;
28654222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
28664222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2867544a5e07SHong Zhang   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
28684222ddf1SHong Zhang   PetscFunctionReturn(0);
28694222ddf1SHong Zhang }
28704222ddf1SHong Zhang 
28714222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
28724222ddf1SHong Zhang {
28734222ddf1SHong Zhang   PetscErrorCode ierr;
28744222ddf1SHong Zhang   Mat_Product    *product = C->product;
28754222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
28764222ddf1SHong Zhang   Mat            A=product->A,P=product->B;
28774222ddf1SHong Zhang   PetscInt       alg=1; /* set default algorithm */
28784222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
28794222ddf1SHong Zhang   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
28804222ddf1SHong Zhang   PetscInt       nalg=4;
28814222ddf1SHong Zhang #else
28824222ddf1SHong Zhang   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
28834222ddf1SHong Zhang   PetscInt       nalg=5;
28844222ddf1SHong Zhang #endif
28854222ddf1SHong Zhang 
28864222ddf1SHong Zhang   PetscFunctionBegin;
2887544a5e07SHong Zhang   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
28884222ddf1SHong Zhang 
28894222ddf1SHong Zhang   /* PtAP */
28904222ddf1SHong Zhang   /* Check matrix local sizes */
28914222ddf1SHong Zhang   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
28924222ddf1SHong Zhang   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
28934222ddf1SHong Zhang 
28944222ddf1SHong Zhang   /* Set the default algorithm */
28954222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
28964222ddf1SHong Zhang   if (flg) {
28974222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
28984222ddf1SHong Zhang   }
28994222ddf1SHong Zhang 
29004222ddf1SHong Zhang   /* Get runtime option */
29014222ddf1SHong Zhang   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
29024222ddf1SHong Zhang   ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29034222ddf1SHong Zhang   if (flg) {
29044222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29054222ddf1SHong Zhang   }
29064222ddf1SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
29074222ddf1SHong Zhang 
29084222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
29094222ddf1SHong Zhang   if (flg) {
29104222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
29114222ddf1SHong Zhang     PetscFunctionReturn(0);
29124222ddf1SHong Zhang   }
29134222ddf1SHong Zhang 
29144222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
29154222ddf1SHong Zhang   if (flg) {
29164222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
29174222ddf1SHong Zhang     PetscFunctionReturn(0);
29184222ddf1SHong Zhang   }
29194222ddf1SHong Zhang 
29204222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
29216718818eSStefano Zampini   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr);
29224222ddf1SHong Zhang   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
29236718818eSStefano Zampini   ierr = MatProductSetFromOptions(C);CHKERRQ(ierr);
29244222ddf1SHong Zhang   PetscFunctionReturn(0);
29254222ddf1SHong Zhang }
29264222ddf1SHong Zhang 
29274222ddf1SHong Zhang /* ----------------------------------------------------------------*/
29284222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
29297ba1a0bfSKris Buschelman {
29307ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
29310298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
29327ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
29337ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
29347ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2935d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29367ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2937d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2938d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29397ba1a0bfSKris Buschelman   MatScalar          *ca;
2940d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29417ba1a0bfSKris Buschelman 
29427ba1a0bfSKris Buschelman   PetscFunctionBegin;
29437ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29447ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29457ba1a0bfSKris Buschelman 
29467ba1a0bfSKris Buschelman   cn = pn*ppdof;
29477ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29487ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
2949854ce69bSBarry Smith   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
29507ba1a0bfSKris Buschelman   ci[0] = 0;
29517ba1a0bfSKris Buschelman 
29527ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
2953dcca6d9dSJed Brown   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2954580bdb30SBarry Smith   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2955580bdb30SBarry Smith   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
29567ba1a0bfSKris Buschelman 
29577ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
29587ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
29597ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2960f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
29617ba1a0bfSKris Buschelman   current_space = free_space;
29627ba1a0bfSKris Buschelman 
29637ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29647ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29657ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29667ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29677ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29687ba1a0bfSKris Buschelman       ptanzi = 0;
29697ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29707ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29717ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29727ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29737ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29747ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29757ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29767ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29777ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29787ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29797ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29807ba1a0bfSKris Buschelman           }
29817ba1a0bfSKris Buschelman         }
29827ba1a0bfSKris Buschelman       }
29837ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29847ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29857ba1a0bfSKris Buschelman       cnzi = 0;
29867ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29877ba1a0bfSKris Buschelman         /* Get offset within block of P */
29887ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29897ba1a0bfSKris Buschelman         /* Get block row of P */
29907ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29917ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29927ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29937ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29947ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29957ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29967ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29977ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29987ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29997ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30007ba1a0bfSKris Buschelman           }
30017ba1a0bfSKris Buschelman         }
30027ba1a0bfSKris Buschelman       }
30037ba1a0bfSKris Buschelman 
30047ba1a0bfSKris Buschelman       /* sort sparserow */
30057ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30067ba1a0bfSKris Buschelman 
30077ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30087ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30097ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
3010f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
30117ba1a0bfSKris Buschelman       }
30127ba1a0bfSKris Buschelman 
30137ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
3014580bdb30SBarry Smith       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
301526fbe8dcSKarl Rupp 
30167ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30177ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30187ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30197ba1a0bfSKris Buschelman 
302026fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
302126fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
302226fbe8dcSKarl Rupp 
30237ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30247ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30257ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30267ba1a0bfSKris Buschelman     }
30277ba1a0bfSKris Buschelman   }
30287ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30297ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30307ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
3031854ce69bSBarry Smith   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3032a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
303374ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30347ba1a0bfSKris Buschelman 
30357ba1a0bfSKris Buschelman   /* Allocate space for ca */
3036854ce69bSBarry Smith   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
30377ba1a0bfSKris Buschelman 
30387ba1a0bfSKris Buschelman   /* put together the new matrix */
3039e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr);
30404222ddf1SHong Zhang   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
30417ba1a0bfSKris Buschelman 
30427ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30437ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30444222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
3045e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3046e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30477ba1a0bfSKris Buschelman   c->nonew   = 0;
304826fbe8dcSKarl Rupp 
30494222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
30504222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
30517ba1a0bfSKris Buschelman 
30527ba1a0bfSKris Buschelman   /* Clean up. */
30537ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30547ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
30557ba1a0bfSKris Buschelman }
30567ba1a0bfSKris Buschelman 
30577ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
30587ba1a0bfSKris Buschelman {
30597ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
30607ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
30617ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
30627ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
30637ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30647ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30657ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3066a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3067d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3068d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3069d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3070a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3071d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30727ba1a0bfSKris Buschelman 
30737ba1a0bfSKris Buschelman   PetscFunctionBegin;
30747ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30751795a4d1SJed Brown   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
30767ba1a0bfSKris Buschelman 
30777ba1a0bfSKris Buschelman   /* Clear old values in C */
3078580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
30797ba1a0bfSKris Buschelman 
30807ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30817ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30827ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30837ba1a0bfSKris Buschelman     apnzj = 0;
30847ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30857ba1a0bfSKris Buschelman       /* Get offset within block of P */
30867ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30877ba1a0bfSKris Buschelman       /* Get block row of P */
30887ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30897ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30907ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30917ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30927ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30937ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30947ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30957ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30967ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30977ba1a0bfSKris Buschelman         }
30987ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30997ba1a0bfSKris Buschelman       }
3100dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31017ba1a0bfSKris Buschelman       aa++;
31027ba1a0bfSKris Buschelman     }
31037ba1a0bfSKris Buschelman 
31047ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31057ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31067ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31077ba1a0bfSKris Buschelman 
31087ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31097ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31107ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31117ba1a0bfSKris Buschelman     poffset = pi[prow];
31127ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
31137ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31147ba1a0bfSKris Buschelman     pJ = pj+poffset;
31157ba1a0bfSKris Buschelman     pA = pa+poffset;
31167ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
31177ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
31187ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
31197ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
31207ba1a0bfSKris Buschelman       pJ++;
31217ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31227ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
312326fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
31247ba1a0bfSKris Buschelman       }
3125dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31267ba1a0bfSKris Buschelman       pA++;
31277ba1a0bfSKris Buschelman     }
31287ba1a0bfSKris Buschelman 
31297ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31307ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
31317ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31327ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31337ba1a0bfSKris Buschelman     }
31347ba1a0bfSKris Buschelman   }
31357ba1a0bfSKris Buschelman 
31367ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31377ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31387ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
313974ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
314095ddefa2SHong Zhang   PetscFunctionReturn(0);
314195ddefa2SHong Zhang }
31427ba1a0bfSKris Buschelman 
31434222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
31442121bac1SHong Zhang {
31452121bac1SHong Zhang   PetscErrorCode      ierr;
31464222ddf1SHong Zhang   Mat_Product         *product = C->product;
31474222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
31482121bac1SHong Zhang 
31492121bac1SHong Zhang   PetscFunctionBegin;
31504222ddf1SHong Zhang   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
31512121bac1SHong Zhang   PetscFunctionReturn(0);
31522121bac1SHong Zhang }
31532121bac1SHong Zhang 
3154f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
315595ddefa2SHong Zhang {
315695ddefa2SHong Zhang   PetscFunctionBegin;
31573e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
315895ddefa2SHong Zhang }
315995ddefa2SHong Zhang 
3160f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
316195ddefa2SHong Zhang {
316295ddefa2SHong Zhang   PetscFunctionBegin;
31633e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31647ba1a0bfSKris Buschelman }
31655c65b9ecSFande Kong 
3166bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3167bc8e477aSFande Kong 
3168bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3169bc8e477aSFande Kong {
3170bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3171bc8e477aSFande Kong   PetscErrorCode  ierr;
3172bc8e477aSFande Kong 
3173bc8e477aSFande Kong   PetscFunctionBegin;
317434bcad68SFande Kong 
3175bc8e477aSFande Kong   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3176bc8e477aSFande Kong   PetscFunctionReturn(0);
3177bc8e477aSFande Kong }
3178bc8e477aSFande Kong 
31794222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3180bc8e477aSFande Kong 
31814222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3182bc8e477aSFande Kong {
3183bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3184bc8e477aSFande Kong   PetscErrorCode  ierr;
3185bc8e477aSFande Kong 
3186bc8e477aSFande Kong   PetscFunctionBegin;
3187bc8e477aSFande Kong   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
31884222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3189bc8e477aSFande Kong   PetscFunctionReturn(0);
3190bc8e477aSFande Kong }
3191bc8e477aSFande Kong 
3192bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3193bc8e477aSFande Kong 
3194bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3195bc8e477aSFande Kong {
3196bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3197bc8e477aSFande Kong   PetscErrorCode  ierr;
3198bc8e477aSFande Kong 
3199bc8e477aSFande Kong   PetscFunctionBegin;
320034bcad68SFande Kong 
3201bc8e477aSFande Kong   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3202bc8e477aSFande Kong   PetscFunctionReturn(0);
3203bc8e477aSFande Kong }
3204bc8e477aSFande Kong 
32054222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3206bc8e477aSFande Kong 
32074222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3208bc8e477aSFande Kong {
3209bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3210bc8e477aSFande Kong   PetscErrorCode  ierr;
3211bc8e477aSFande Kong 
3212bc8e477aSFande Kong   PetscFunctionBegin;
321334bcad68SFande Kong 
3214bc8e477aSFande Kong   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
32154222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3216bc8e477aSFande Kong   PetscFunctionReturn(0);
3217bc8e477aSFande Kong }
3218bc8e477aSFande Kong 
32194222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3220bc8e477aSFande Kong {
3221bc8e477aSFande Kong   PetscErrorCode      ierr;
32224222ddf1SHong Zhang   Mat_Product         *product = C->product;
32234222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
32244222ddf1SHong Zhang   PetscBool           flg;
3225bc8e477aSFande Kong 
3226bc8e477aSFande Kong   PetscFunctionBegin;
32274222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
32284222ddf1SHong Zhang   if (flg) {
32294222ddf1SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
32304222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
32314222ddf1SHong Zhang     PetscFunctionReturn(0);
3232bc8e477aSFande Kong   }
323334bcad68SFande Kong 
32344222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
32354222ddf1SHong Zhang   if (flg) {
32364222ddf1SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
32374222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
32384222ddf1SHong Zhang     PetscFunctionReturn(0);
32394222ddf1SHong Zhang   }
324034bcad68SFande Kong 
32414222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3242bc8e477aSFande Kong }
3243bc8e477aSFande Kong 
3244cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32459c4fc4c7SBarry Smith {
32469c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3247ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
32489c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
32499c4fc4c7SBarry Smith   PetscErrorCode ierr;
32500fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3251ba8c8a56SBarry Smith   PetscInt       *cols;
3252ba8c8a56SBarry Smith   PetscScalar    *vals;
32539c4fc4c7SBarry Smith 
32549c4fc4c7SBarry Smith   PetscFunctionBegin;
32559c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3256785e854fSJed Brown   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
32579c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32589c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
325926fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
32609c4fc4c7SBarry Smith   }
3261fdc842d1SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3262fdc842d1SBarry Smith   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3263fdc842d1SBarry Smith   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3264fdc842d1SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
32659c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
3266785e854fSJed Brown   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
32679c4fc4c7SBarry Smith   ii   = 0;
32689c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3269ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32700fd73130SBarry Smith     for (j=0; j<dof; j++) {
327126fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3272ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32739c4fc4c7SBarry Smith       ii++;
32749c4fc4c7SBarry Smith     }
3275ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32769c4fc4c7SBarry Smith   }
32779c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3278ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280ceb03754SKris Buschelman 
3281511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
328228be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3283c3d102feSKris Buschelman   } else {
3284ceb03754SKris Buschelman     *newmat = B;
3285c3d102feSKris Buschelman   }
32869c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32879c4fc4c7SBarry Smith }
32889c4fc4c7SBarry Smith 
3289c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3290be1d678aSKris Buschelman 
3291cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32920fd73130SBarry Smith {
32930fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3294ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32950fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32960fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
32970fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
32980fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
32990298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
33000298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
33010fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
33020fd73130SBarry Smith   PetscErrorCode ierr;
33030fd73130SBarry Smith   PetscScalar    *vals,*ovals;
33040fd73130SBarry Smith 
33050fd73130SBarry Smith   PetscFunctionBegin;
3306dcca6d9dSJed Brown   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3307d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33080fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
33090fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
33100fd73130SBarry Smith     for (j=0; j<dof; j++) {
33110fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
33120fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
33130fd73130SBarry Smith     }
33140fd73130SBarry Smith   }
3315fdc842d1SBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3316fdc842d1SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3317fdc842d1SBarry Smith   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3318fdc842d1SBarry Smith   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3319f27682edSJed Brown   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
33200fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33210fd73130SBarry Smith 
3322dcca6d9dSJed Brown   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3323d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3324d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33250fd73130SBarry Smith   garray = mpiaij->garray;
33260fd73130SBarry Smith 
33270fd73130SBarry Smith   ii = rstart;
3328d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33290fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33300fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33310fd73130SBarry Smith     for (j=0; j<dof; j++) {
33320fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33330fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33340fd73130SBarry Smith       }
33350fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33360fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33370fd73130SBarry Smith       }
3338ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3339ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33400fd73130SBarry Smith       ii++;
33410fd73130SBarry Smith     }
33420fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33430fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33440fd73130SBarry Smith   }
33450fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33460fd73130SBarry Smith 
3347ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3349ceb03754SKris Buschelman 
3350511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
33517adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33527adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
335326fbe8dcSKarl Rupp 
335428be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
335526fbe8dcSKarl Rupp 
33567adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3357c3d102feSKris Buschelman   } else {
3358ceb03754SKris Buschelman     *newmat = B;
3359c3d102feSKris Buschelman   }
33600fd73130SBarry Smith   PetscFunctionReturn(0);
33610fd73130SBarry Smith }
33620fd73130SBarry Smith 
33637dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33649e516c8fSBarry Smith {
33659e516c8fSBarry Smith   PetscErrorCode ierr;
33669e516c8fSBarry Smith   Mat            A;
33679e516c8fSBarry Smith 
33689e516c8fSBarry Smith   PetscFunctionBegin;
33699e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33707dae84e0SHong Zhang   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33719e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33729e516c8fSBarry Smith   PetscFunctionReturn(0);
33739e516c8fSBarry Smith }
33740fd73130SBarry Smith 
3375ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3376ec626240SStefano Zampini {
3377ec626240SStefano Zampini   PetscErrorCode ierr;
3378ec626240SStefano Zampini   Mat            A;
3379ec626240SStefano Zampini 
3380ec626240SStefano Zampini   PetscFunctionBegin;
3381ec626240SStefano Zampini   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3382ec626240SStefano Zampini   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3383ec626240SStefano Zampini   ierr = MatDestroy(&A);CHKERRQ(ierr);
3384ec626240SStefano Zampini   PetscFunctionReturn(0);
3385ec626240SStefano Zampini }
3386ec626240SStefano Zampini 
3387bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3388480dffcfSJed Brown /*@
33890bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33900bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33910bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33920bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33930bad9183SKris Buschelman 
3394ff585edeSJed Brown   Collective
3395ff585edeSJed Brown 
3396ff585edeSJed Brown   Input Parameters:
3397ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3398ff585edeSJed Brown - dof - the block size (number of components per node)
3399ff585edeSJed Brown 
3400ff585edeSJed Brown   Output Parameter:
3401ff585edeSJed Brown . maij - the new MAIJ matrix
3402ff585edeSJed Brown 
34030bad9183SKris Buschelman   Operations provided:
34040fd73130SBarry Smith + MatMult
34050bad9183SKris Buschelman . MatMultTranspose
34060bad9183SKris Buschelman . MatMultAdd
34070bad9183SKris Buschelman . MatMultTransposeAdd
34080fd73130SBarry Smith - MatView
34090bad9183SKris Buschelman 
34100bad9183SKris Buschelman   Level: advanced
34110bad9183SKris Buschelman 
3412b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3413ff585edeSJed Brown @*/
34147087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
341582b1193eSBarry Smith {
3416dfbe8321SBarry Smith   PetscErrorCode ierr;
3417b24ad042SBarry Smith   PetscMPIInt    size;
3418b24ad042SBarry Smith   PetscInt       n;
341982b1193eSBarry Smith   Mat            B;
3420fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3421fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3422fdc842d1SBarry Smith   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3423fdc842d1SBarry Smith #endif
342482b1193eSBarry Smith 
342582b1193eSBarry Smith   PetscFunctionBegin;
3426fdc842d1SBarry Smith   dof  = PetscAbs(dof);
3427d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3428d72c5c08SBarry Smith 
342926fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
343026fbe8dcSKarl Rupp   else {
3431ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3432c248e2fdSStefano Zampini     /* propagate vec type */
3433c248e2fdSStefano Zampini     ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr);
3434d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3435bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3436bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3437bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3438bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
343926fbe8dcSKarl Rupp 
3440cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3441d72c5c08SBarry Smith 
3442ffc4695bSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3443f4a53059SBarry Smith     if (size == 1) {
3444feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3445feb9fe23SJed Brown 
3446b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
344726fbe8dcSKarl Rupp 
34480298fd71SBarry Smith       B->ops->setup   = NULL;
34494cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34500fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
34514222ddf1SHong Zhang 
3452feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3453b9b97703SBarry Smith       b->dof          = dof;
34544cb9d9b8SBarry Smith       b->AIJ          = A;
345526fbe8dcSKarl Rupp 
3456d72c5c08SBarry Smith       if (dof == 2) {
3457cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3458cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3459cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3460cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3461bcc973b7SBarry Smith       } else if (dof == 3) {
3462bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3463bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3464bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3465bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3466bcc973b7SBarry Smith       } else if (dof == 4) {
3467bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3468bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3469bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3470bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3471f9fae5adSBarry Smith       } else if (dof == 5) {
3472f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3473f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3474f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3475f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34766cd98798SBarry Smith       } else if (dof == 6) {
34776cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34786cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34796cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34806cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3481ed8eea03SBarry Smith       } else if (dof == 7) {
3482ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3483ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3484ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3485ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
348666ed3db0SBarry Smith       } else if (dof == 8) {
348766ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
348866ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
348966ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
349066ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34912b692628SMatthew Knepley       } else if (dof == 9) {
34922b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34932b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34942b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34952b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3496b9cda22cSBarry Smith       } else if (dof == 10) {
3497b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3498b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3499b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3500b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3501dbdd7285SBarry Smith       } else if (dof == 11) {
3502dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3503dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3504dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3505dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
35062f7816d4SBarry Smith       } else if (dof == 16) {
35072f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
35082f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
35092f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
35102f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3511ed1c418cSBarry Smith       } else if (dof == 18) {
3512ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3513ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3514ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3515ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
351682b1193eSBarry Smith       } else {
35176861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
35186861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
35196861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
35206861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
352182b1193eSBarry Smith       }
3522fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3523fdc842d1SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3524fdc842d1SBarry Smith #endif
3525bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
35264222ddf1SHong Zhang       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3527f4a53059SBarry Smith     } else {
3528f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3529feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3530f4a53059SBarry Smith       IS          from,to;
3531f4a53059SBarry Smith       Vec         gvec;
3532f4a53059SBarry Smith 
3533b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
353426fbe8dcSKarl Rupp 
35350298fd71SBarry Smith       B->ops->setup            = NULL;
3536d72c5c08SBarry Smith       B->ops->destroy          = MatDestroy_MPIMAIJ;
35370fd73130SBarry Smith       B->ops->view             = MatView_MPIMAIJ;
353826fbe8dcSKarl Rupp 
3539b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3540b9b97703SBarry Smith       b->dof = dof;
3541b9b97703SBarry Smith       b->A   = A;
354226fbe8dcSKarl Rupp 
3543fdc842d1SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3544fdc842d1SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
35454cb9d9b8SBarry Smith 
3546f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3547a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3548a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
354913288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3550a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3551f4a53059SBarry Smith 
3552f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3553ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3554f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3555f4a53059SBarry Smith 
3556f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3557ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3558f4a53059SBarry Smith 
3559f4a53059SBarry Smith       /* generate the scatter context */
35609448b7f1SJunchao Zhang       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3561f4a53059SBarry Smith 
35626bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35636bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35646bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3565f4a53059SBarry Smith 
3566f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
35674cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
35684cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
35694cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
357026fbe8dcSKarl Rupp 
3571fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3572fdc842d1SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3573fdc842d1SBarry Smith #endif
3574bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
35754222ddf1SHong Zhang       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3576f4a53059SBarry Smith     }
35777dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3578ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
35794994cf47SJed Brown     ierr = MatSetUp(B);CHKERRQ(ierr);
3580fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3581fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3582fdc842d1SBarry Smith     {
3583fdc842d1SBarry Smith       PetscBool flg;
3584fdc842d1SBarry Smith       if (convert) {
3585fdc842d1SBarry Smith         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3586fdc842d1SBarry Smith         if (flg) {
3587fdc842d1SBarry Smith           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3588fdc842d1SBarry Smith         }
3589fdc842d1SBarry Smith       }
3590fdc842d1SBarry Smith     }
3591fdc842d1SBarry Smith #endif
3592cd3bbe55SBarry Smith     *maij = B;
3593d72c5c08SBarry Smith   }
359482b1193eSBarry Smith   PetscFunctionReturn(0);
359582b1193eSBarry Smith }
3596