xref: /petsc/src/mat/impls/maij/maij.c (revision 2e956fe4fc852fabc23b437482e1fb7b77fddb0d)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38db781477SPatrick Sanan .seealso: `MatCreateMAIJ()`
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
43b9b97703SBarry Smith 
44b9b97703SBarry Smith   PetscFunctionBegin;
459566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij));
469566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij));
47b9b97703SBarry Smith   if (ismpimaij) {
48b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49b9b97703SBarry Smith 
50b9b97703SBarry Smith     *B = b->A;
51b9b97703SBarry Smith   } else if (isseqmaij) {
52b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53b9b97703SBarry Smith 
54b9b97703SBarry Smith     *B = b->AIJ;
55b9b97703SBarry Smith   } else {
56b9b97703SBarry Smith     *B = A;
57b9b97703SBarry Smith   }
58b9b97703SBarry Smith   PetscFunctionReturn(0);
59b9b97703SBarry Smith }
60b9b97703SBarry Smith 
61480dffcfSJed Brown /*@
62ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63ff585edeSJed Brown 
643f9fe445SBarry Smith    Logically Collective
65ff585edeSJed Brown 
66d8d19677SJose E. Roman    Input Parameters:
67ff585edeSJed Brown +  A - the MAIJ matrix
68ff585edeSJed Brown -  dof - the block size for the new matrix
69ff585edeSJed Brown 
70ff585edeSJed Brown    Output Parameter:
71ff585edeSJed Brown .  B - the new MAIJ matrix
72ff585edeSJed Brown 
73ff585edeSJed Brown    Level: advanced
74ff585edeSJed Brown 
75db781477SPatrick Sanan .seealso: `MatCreateMAIJ()`
76ff585edeSJed Brown @*/
777087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78b9b97703SBarry Smith {
790298fd71SBarry Smith   Mat            Aij = NULL;
80b9b97703SBarry Smith 
81b9b97703SBarry Smith   PetscFunctionBegin;
82c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
839566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A,&Aij));
849566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij,dof,B));
85b9b97703SBarry Smith   PetscFunctionReturn(0);
86b9b97703SBarry Smith }
87b9b97703SBarry Smith 
88dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
8982b1193eSBarry Smith {
904cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9182b1193eSBarry Smith 
9282b1193eSBarry Smith   PetscFunctionBegin;
939566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
949566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
95*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaijcusparse_C",NULL));
969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL));
979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL));
984cb9d9b8SBarry Smith   PetscFunctionReturn(0);
994cb9d9b8SBarry Smith }
1004cb9d9b8SBarry Smith 
101356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
102356c569eSBarry Smith {
103356c569eSBarry Smith   PetscFunctionBegin;
104ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
105356c569eSBarry Smith }
106356c569eSBarry Smith 
1070fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1080fd73130SBarry Smith {
1090fd73130SBarry Smith   Mat            B;
1100fd73130SBarry Smith 
1110fd73130SBarry Smith   PetscFunctionBegin;
1129566063dSJacob Faibussowitsch   PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1139566063dSJacob Faibussowitsch   PetscCall(MatView(B,viewer));
1149566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1150fd73130SBarry Smith   PetscFunctionReturn(0);
1160fd73130SBarry Smith }
1170fd73130SBarry Smith 
1180fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1190fd73130SBarry Smith {
1200fd73130SBarry Smith   Mat            B;
1210fd73130SBarry Smith 
1220fd73130SBarry Smith   PetscFunctionBegin;
1239566063dSJacob Faibussowitsch   PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B));
1249566063dSJacob Faibussowitsch   PetscCall(MatView(B,viewer));
1259566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1260fd73130SBarry Smith   PetscFunctionReturn(0);
1270fd73130SBarry Smith }
1280fd73130SBarry Smith 
129dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1304cb9d9b8SBarry Smith {
1314cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1324cb9d9b8SBarry Smith 
1334cb9d9b8SBarry Smith   PetscFunctionBegin;
1349566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1369566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1379566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1389566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1399566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
140*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaijcusparse_C",NULL));
1419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL));
1429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL));
1439566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
144b9b97703SBarry Smith   PetscFunctionReturn(0);
145b9b97703SBarry Smith }
146b9b97703SBarry Smith 
1470bad9183SKris Buschelman /*MC
148fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1490bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1500bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1510bad9183SKris Buschelman 
1520bad9183SKris Buschelman   Operations provided:
15367b8a455SSatish Balay .vb
15467b8a455SSatish Balay     MatMult
15567b8a455SSatish Balay     MatMultTranspose
15667b8a455SSatish Balay     MatMultAdd
15767b8a455SSatish Balay     MatMultTransposeAdd
15867b8a455SSatish Balay .ve
1590bad9183SKris Buschelman 
1600bad9183SKris Buschelman   Level: advanced
1610bad9183SKris Buschelman 
162db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1630bad9183SKris Buschelman M*/
1640bad9183SKris Buschelman 
1658cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
16682b1193eSBarry Smith {
1674cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
16864b52464SHong Zhang   PetscMPIInt    size;
16982b1193eSBarry Smith 
17082b1193eSBarry Smith   PetscFunctionBegin;
1719566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A,&b));
172b0a32e0cSBarry Smith   A->data  = (void*)b;
17326fbe8dcSKarl Rupp 
1749566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
17526fbe8dcSKarl Rupp 
176356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
177f4a53059SBarry Smith 
178f4259b30SLisandro Dalcin   b->AIJ  = NULL;
179cd3bbe55SBarry Smith   b->dof  = 0;
180f4259b30SLisandro Dalcin   b->OAIJ = NULL;
181f4259b30SLisandro Dalcin   b->ctx  = NULL;
182f4259b30SLisandro Dalcin   b->w    = NULL;
1839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
18464b52464SHong Zhang   if (size == 1) {
1859566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ));
18664b52464SHong Zhang   } else {
1879566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ));
18864b52464SHong Zhang   }
18932e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19032e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19182b1193eSBarry Smith   PetscFunctionReturn(0);
19282b1193eSBarry Smith }
19382b1193eSBarry Smith 
194cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
195dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
19682b1193eSBarry Smith {
1974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
198bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
199d2840e09SBarry Smith   const PetscScalar *x,*v;
200d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
201d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
202d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
20382b1193eSBarry Smith 
204bcc973b7SBarry Smith   PetscFunctionBegin;
2059566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2069566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
207bcc973b7SBarry Smith   idx  = a->j;
208bcc973b7SBarry Smith   v    = a->a;
209bcc973b7SBarry Smith   ii   = a->i;
210bcc973b7SBarry Smith 
211bcc973b7SBarry Smith   for (i=0; i<m; i++) {
212bcc973b7SBarry Smith     jrow  = ii[i];
213bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
214bcc973b7SBarry Smith     sum1  = 0.0;
215bcc973b7SBarry Smith     sum2  = 0.0;
21626fbe8dcSKarl Rupp 
217b3c51c66SMatthew Knepley     nonzerorow += (n>0);
218bcc973b7SBarry Smith     for (j=0; j<n; j++) {
219bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
220bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
221bcc973b7SBarry Smith       jrow++;
222bcc973b7SBarry Smith     }
223bcc973b7SBarry Smith     y[2*i]   = sum1;
224bcc973b7SBarry Smith     y[2*i+1] = sum2;
225bcc973b7SBarry Smith   }
226bcc973b7SBarry Smith 
2279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow));
2289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
23082b1193eSBarry Smith   PetscFunctionReturn(0);
23182b1193eSBarry Smith }
232bcc973b7SBarry Smith 
233dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
23482b1193eSBarry Smith {
2354cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
236bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
237d2840e09SBarry Smith   const PetscScalar *x,*v;
238d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
239d2840e09SBarry Smith   PetscInt          n,i;
240d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
24182b1193eSBarry Smith 
242bcc973b7SBarry Smith   PetscFunctionBegin;
2439566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
2449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2459566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
246bfec09a0SHong Zhang 
247bcc973b7SBarry Smith   for (i=0; i<m; i++) {
248bfec09a0SHong Zhang     idx    = a->j + a->i[i];
249bfec09a0SHong Zhang     v      = a->a + a->i[i];
250bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
251bcc973b7SBarry Smith     alpha1 = x[2*i];
252bcc973b7SBarry Smith     alpha2 = x[2*i+1];
25326fbe8dcSKarl Rupp     while (n-->0) {
25426fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
25526fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
25626fbe8dcSKarl Rupp       idx++; v++;
25726fbe8dcSKarl Rupp     }
258bcc973b7SBarry Smith   }
2599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
2609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
26282b1193eSBarry Smith   PetscFunctionReturn(0);
26382b1193eSBarry Smith }
264bcc973b7SBarry Smith 
265dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
26682b1193eSBarry Smith {
2674cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
268bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
269d2840e09SBarry Smith   const PetscScalar *x,*v;
270d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
271b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
272d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27382b1193eSBarry Smith 
274bcc973b7SBarry Smith   PetscFunctionBegin;
2759566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
2769566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2779566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
278bcc973b7SBarry Smith   idx  = a->j;
279bcc973b7SBarry Smith   v    = a->a;
280bcc973b7SBarry Smith   ii   = a->i;
281bcc973b7SBarry Smith 
282bcc973b7SBarry Smith   for (i=0; i<m; i++) {
283bcc973b7SBarry Smith     jrow = ii[i];
284bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
285bcc973b7SBarry Smith     sum1 = 0.0;
286bcc973b7SBarry Smith     sum2 = 0.0;
287bcc973b7SBarry Smith     for (j=0; j<n; j++) {
288bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
289bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
290bcc973b7SBarry Smith       jrow++;
291bcc973b7SBarry Smith     }
292bcc973b7SBarry Smith     y[2*i]   += sum1;
293bcc973b7SBarry Smith     y[2*i+1] += sum2;
294bcc973b7SBarry Smith   }
295bcc973b7SBarry Smith 
2969566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
2979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
299bcc973b7SBarry Smith   PetscFunctionReturn(0);
30082b1193eSBarry Smith }
301dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
30282b1193eSBarry Smith {
3034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
304bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
305d2840e09SBarry Smith   const PetscScalar *x,*v;
306d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
307d2840e09SBarry Smith   PetscInt          n,i;
308d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
30982b1193eSBarry Smith 
310bcc973b7SBarry Smith   PetscFunctionBegin;
3119566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
3129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3139566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
314bfec09a0SHong Zhang 
315bcc973b7SBarry Smith   for (i=0; i<m; i++) {
316bfec09a0SHong Zhang     idx    = a->j + a->i[i];
317bfec09a0SHong Zhang     v      = a->a + a->i[i];
318bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
319bcc973b7SBarry Smith     alpha1 = x[2*i];
320bcc973b7SBarry Smith     alpha2 = x[2*i+1];
32126fbe8dcSKarl Rupp     while (n-->0) {
32226fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
32326fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
32426fbe8dcSKarl Rupp       idx++; v++;
32526fbe8dcSKarl Rupp     }
326bcc973b7SBarry Smith   }
3279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
3289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
330bcc973b7SBarry Smith   PetscFunctionReturn(0);
33182b1193eSBarry Smith }
332cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
333dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
334bcc973b7SBarry Smith {
3354cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
336bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
337d2840e09SBarry Smith   const PetscScalar *x,*v;
338d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
339d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
340d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
34182b1193eSBarry Smith 
342bcc973b7SBarry Smith   PetscFunctionBegin;
3439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
345bcc973b7SBarry Smith   idx  = a->j;
346bcc973b7SBarry Smith   v    = a->a;
347bcc973b7SBarry Smith   ii   = a->i;
348bcc973b7SBarry Smith 
349bcc973b7SBarry Smith   for (i=0; i<m; i++) {
350bcc973b7SBarry Smith     jrow  = ii[i];
351bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
352bcc973b7SBarry Smith     sum1  = 0.0;
353bcc973b7SBarry Smith     sum2  = 0.0;
354bcc973b7SBarry Smith     sum3  = 0.0;
35526fbe8dcSKarl Rupp 
356b3c51c66SMatthew Knepley     nonzerorow += (n>0);
357bcc973b7SBarry Smith     for (j=0; j<n; j++) {
358bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
359bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
360bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
361bcc973b7SBarry Smith       jrow++;
362bcc973b7SBarry Smith      }
363bcc973b7SBarry Smith     y[3*i]   = sum1;
364bcc973b7SBarry Smith     y[3*i+1] = sum2;
365bcc973b7SBarry Smith     y[3*i+2] = sum3;
366bcc973b7SBarry Smith   }
367bcc973b7SBarry Smith 
3689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow));
3699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
371bcc973b7SBarry Smith   PetscFunctionReturn(0);
372bcc973b7SBarry Smith }
373bcc973b7SBarry Smith 
374dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
375bcc973b7SBarry Smith {
3764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
377bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
378d2840e09SBarry Smith   const PetscScalar *x,*v;
379d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
380d2840e09SBarry Smith   PetscInt          n,i;
381d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
382bcc973b7SBarry Smith 
383bcc973b7SBarry Smith   PetscFunctionBegin;
3849566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
3859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
387bfec09a0SHong Zhang 
388bcc973b7SBarry Smith   for (i=0; i<m; i++) {
389bfec09a0SHong Zhang     idx    = a->j + a->i[i];
390bfec09a0SHong Zhang     v      = a->a + a->i[i];
391bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
392bcc973b7SBarry Smith     alpha1 = x[3*i];
393bcc973b7SBarry Smith     alpha2 = x[3*i+1];
394bcc973b7SBarry Smith     alpha3 = x[3*i+2];
395bcc973b7SBarry Smith     while (n-->0) {
396bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
397bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
398bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
399bcc973b7SBarry Smith       idx++; v++;
400bcc973b7SBarry Smith     }
401bcc973b7SBarry Smith   }
4029566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
405bcc973b7SBarry Smith   PetscFunctionReturn(0);
406bcc973b7SBarry Smith }
407bcc973b7SBarry Smith 
408dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
409bcc973b7SBarry Smith {
4104cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
411bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
412d2840e09SBarry Smith   const PetscScalar *x,*v;
413d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
414b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
415d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
416bcc973b7SBarry Smith 
417bcc973b7SBarry Smith   PetscFunctionBegin;
4189566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
4199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4209566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
421bcc973b7SBarry Smith   idx  = a->j;
422bcc973b7SBarry Smith   v    = a->a;
423bcc973b7SBarry Smith   ii   = a->i;
424bcc973b7SBarry Smith 
425bcc973b7SBarry Smith   for (i=0; i<m; i++) {
426bcc973b7SBarry Smith     jrow = ii[i];
427bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
428bcc973b7SBarry Smith     sum1 = 0.0;
429bcc973b7SBarry Smith     sum2 = 0.0;
430bcc973b7SBarry Smith     sum3 = 0.0;
431bcc973b7SBarry Smith     for (j=0; j<n; j++) {
432bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
433bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
434bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
435bcc973b7SBarry Smith       jrow++;
436bcc973b7SBarry Smith     }
437bcc973b7SBarry Smith     y[3*i]   += sum1;
438bcc973b7SBarry Smith     y[3*i+1] += sum2;
439bcc973b7SBarry Smith     y[3*i+2] += sum3;
440bcc973b7SBarry Smith   }
441bcc973b7SBarry Smith 
4429566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
445bcc973b7SBarry Smith   PetscFunctionReturn(0);
446bcc973b7SBarry Smith }
447dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
448bcc973b7SBarry Smith {
4494cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
450bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
451d2840e09SBarry Smith   const PetscScalar *x,*v;
452d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
453d2840e09SBarry Smith   PetscInt          n,i;
454d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
455bcc973b7SBarry Smith 
456bcc973b7SBarry Smith   PetscFunctionBegin;
4579566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
4589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
460bcc973b7SBarry Smith   for (i=0; i<m; i++) {
461bfec09a0SHong Zhang     idx    = a->j + a->i[i];
462bfec09a0SHong Zhang     v      = a->a + a->i[i];
463bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
464bcc973b7SBarry Smith     alpha1 = x[3*i];
465bcc973b7SBarry Smith     alpha2 = x[3*i+1];
466bcc973b7SBarry Smith     alpha3 = x[3*i+2];
467bcc973b7SBarry Smith     while (n-->0) {
468bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
469bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
470bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
471bcc973b7SBarry Smith       idx++; v++;
472bcc973b7SBarry Smith     }
473bcc973b7SBarry Smith   }
4749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
477bcc973b7SBarry Smith   PetscFunctionReturn(0);
478bcc973b7SBarry Smith }
479bcc973b7SBarry Smith 
480bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
481dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
482bcc973b7SBarry Smith {
4834cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
484bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
485d2840e09SBarry Smith   const PetscScalar *x,*v;
486d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
487d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
488d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
489bcc973b7SBarry Smith 
490bcc973b7SBarry Smith   PetscFunctionBegin;
4919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
493bcc973b7SBarry Smith   idx  = a->j;
494bcc973b7SBarry Smith   v    = a->a;
495bcc973b7SBarry Smith   ii   = a->i;
496bcc973b7SBarry Smith 
497bcc973b7SBarry Smith   for (i=0; i<m; i++) {
498bcc973b7SBarry Smith     jrow        = ii[i];
499bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
500bcc973b7SBarry Smith     sum1        = 0.0;
501bcc973b7SBarry Smith     sum2        = 0.0;
502bcc973b7SBarry Smith     sum3        = 0.0;
503bcc973b7SBarry Smith     sum4        = 0.0;
504b3c51c66SMatthew Knepley     nonzerorow += (n>0);
505bcc973b7SBarry Smith     for (j=0; j<n; j++) {
506bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
507bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
508bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
509bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
510bcc973b7SBarry Smith       jrow++;
511bcc973b7SBarry Smith     }
512bcc973b7SBarry Smith     y[4*i]   = sum1;
513bcc973b7SBarry Smith     y[4*i+1] = sum2;
514bcc973b7SBarry Smith     y[4*i+2] = sum3;
515bcc973b7SBarry Smith     y[4*i+3] = sum4;
516bcc973b7SBarry Smith   }
517bcc973b7SBarry Smith 
5189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow));
5199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
521bcc973b7SBarry Smith   PetscFunctionReturn(0);
522bcc973b7SBarry Smith }
523bcc973b7SBarry Smith 
524dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
525bcc973b7SBarry Smith {
5264cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
527bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
528d2840e09SBarry Smith   const PetscScalar *x,*v;
529d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
530d2840e09SBarry Smith   PetscInt          n,i;
531d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
532bcc973b7SBarry Smith 
533bcc973b7SBarry Smith   PetscFunctionBegin;
5349566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
5359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
537bcc973b7SBarry Smith   for (i=0; i<m; i++) {
538bfec09a0SHong Zhang     idx    = a->j + a->i[i];
539bfec09a0SHong Zhang     v      = a->a + a->i[i];
540bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
541bcc973b7SBarry Smith     alpha1 = x[4*i];
542bcc973b7SBarry Smith     alpha2 = x[4*i+1];
543bcc973b7SBarry Smith     alpha3 = x[4*i+2];
544bcc973b7SBarry Smith     alpha4 = x[4*i+3];
545bcc973b7SBarry Smith     while (n-->0) {
546bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
547bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
548bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
549bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
550bcc973b7SBarry Smith       idx++; v++;
551bcc973b7SBarry Smith     }
552bcc973b7SBarry Smith   }
5539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
5549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
556bcc973b7SBarry Smith   PetscFunctionReturn(0);
557bcc973b7SBarry Smith }
558bcc973b7SBarry Smith 
559dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
560bcc973b7SBarry Smith {
5614cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
562f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
563d2840e09SBarry Smith   const PetscScalar *x,*v;
564d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
565b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
566d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
567f9fae5adSBarry Smith 
568f9fae5adSBarry Smith   PetscFunctionBegin;
5699566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
5709566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5719566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
572f9fae5adSBarry Smith   idx  = a->j;
573f9fae5adSBarry Smith   v    = a->a;
574f9fae5adSBarry Smith   ii   = a->i;
575f9fae5adSBarry Smith 
576f9fae5adSBarry Smith   for (i=0; i<m; i++) {
577f9fae5adSBarry Smith     jrow = ii[i];
578f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
579f9fae5adSBarry Smith     sum1 = 0.0;
580f9fae5adSBarry Smith     sum2 = 0.0;
581f9fae5adSBarry Smith     sum3 = 0.0;
582f9fae5adSBarry Smith     sum4 = 0.0;
583f9fae5adSBarry Smith     for (j=0; j<n; j++) {
584f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
585f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
586f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
587f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
588f9fae5adSBarry Smith       jrow++;
589f9fae5adSBarry Smith     }
590f9fae5adSBarry Smith     y[4*i]   += sum1;
591f9fae5adSBarry Smith     y[4*i+1] += sum2;
592f9fae5adSBarry Smith     y[4*i+2] += sum3;
593f9fae5adSBarry Smith     y[4*i+3] += sum4;
594f9fae5adSBarry Smith   }
595f9fae5adSBarry Smith 
5969566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
5979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
599f9fae5adSBarry Smith   PetscFunctionReturn(0);
600bcc973b7SBarry Smith }
601dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
602bcc973b7SBarry Smith {
6034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
604f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
605d2840e09SBarry Smith   const PetscScalar *x,*v;
606d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
607d2840e09SBarry Smith   PetscInt          n,i;
608d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
609f9fae5adSBarry Smith 
610f9fae5adSBarry Smith   PetscFunctionBegin;
6119566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
6129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6139566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
614bfec09a0SHong Zhang 
615f9fae5adSBarry Smith   for (i=0; i<m; i++) {
616bfec09a0SHong Zhang     idx    = a->j + a->i[i];
617bfec09a0SHong Zhang     v      = a->a + a->i[i];
618f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
619f9fae5adSBarry Smith     alpha1 = x[4*i];
620f9fae5adSBarry Smith     alpha2 = x[4*i+1];
621f9fae5adSBarry Smith     alpha3 = x[4*i+2];
622f9fae5adSBarry Smith     alpha4 = x[4*i+3];
623f9fae5adSBarry Smith     while (n-->0) {
624f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
625f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
626f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
627f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
628f9fae5adSBarry Smith       idx++; v++;
629f9fae5adSBarry Smith     }
630f9fae5adSBarry Smith   }
6319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
6329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
634f9fae5adSBarry Smith   PetscFunctionReturn(0);
635f9fae5adSBarry Smith }
636f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6376cd98798SBarry Smith 
638dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
639f9fae5adSBarry Smith {
6404cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
641f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
642d2840e09SBarry Smith   const PetscScalar *x,*v;
643d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
644d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
645d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
646f9fae5adSBarry Smith 
647f9fae5adSBarry Smith   PetscFunctionBegin;
6489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
650f9fae5adSBarry Smith   idx  = a->j;
651f9fae5adSBarry Smith   v    = a->a;
652f9fae5adSBarry Smith   ii   = a->i;
653f9fae5adSBarry Smith 
654f9fae5adSBarry Smith   for (i=0; i<m; i++) {
655f9fae5adSBarry Smith     jrow  = ii[i];
656f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
657f9fae5adSBarry Smith     sum1  = 0.0;
658f9fae5adSBarry Smith     sum2  = 0.0;
659f9fae5adSBarry Smith     sum3  = 0.0;
660f9fae5adSBarry Smith     sum4  = 0.0;
661f9fae5adSBarry Smith     sum5  = 0.0;
66226fbe8dcSKarl Rupp 
663b3c51c66SMatthew Knepley     nonzerorow += (n>0);
664f9fae5adSBarry Smith     for (j=0; j<n; j++) {
665f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
666f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
667f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
668f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
669f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
670f9fae5adSBarry Smith       jrow++;
671f9fae5adSBarry Smith     }
672f9fae5adSBarry Smith     y[5*i]   = sum1;
673f9fae5adSBarry Smith     y[5*i+1] = sum2;
674f9fae5adSBarry Smith     y[5*i+2] = sum3;
675f9fae5adSBarry Smith     y[5*i+3] = sum4;
676f9fae5adSBarry Smith     y[5*i+4] = sum5;
677f9fae5adSBarry Smith   }
678f9fae5adSBarry Smith 
6799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow));
6809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
682f9fae5adSBarry Smith   PetscFunctionReturn(0);
683f9fae5adSBarry Smith }
684f9fae5adSBarry Smith 
685dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
686f9fae5adSBarry Smith {
6874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
688f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
689d2840e09SBarry Smith   const PetscScalar *x,*v;
690d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
691d2840e09SBarry Smith   PetscInt          n,i;
692d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
693f9fae5adSBarry Smith 
694f9fae5adSBarry Smith   PetscFunctionBegin;
6959566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
6969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
698bfec09a0SHong Zhang 
699f9fae5adSBarry Smith   for (i=0; i<m; i++) {
700bfec09a0SHong Zhang     idx    = a->j + a->i[i];
701bfec09a0SHong Zhang     v      = a->a + a->i[i];
702f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
703f9fae5adSBarry Smith     alpha1 = x[5*i];
704f9fae5adSBarry Smith     alpha2 = x[5*i+1];
705f9fae5adSBarry Smith     alpha3 = x[5*i+2];
706f9fae5adSBarry Smith     alpha4 = x[5*i+3];
707f9fae5adSBarry Smith     alpha5 = x[5*i+4];
708f9fae5adSBarry Smith     while (n-->0) {
709f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
710f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
711f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
712f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
713f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
714f9fae5adSBarry Smith       idx++; v++;
715f9fae5adSBarry Smith     }
716f9fae5adSBarry Smith   }
7179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
7189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
7199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
720f9fae5adSBarry Smith   PetscFunctionReturn(0);
721f9fae5adSBarry Smith }
722f9fae5adSBarry Smith 
723dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
724f9fae5adSBarry Smith {
7254cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
726f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
727d2840e09SBarry Smith   const PetscScalar *x,*v;
728d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
729b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
730d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
731f9fae5adSBarry Smith 
732f9fae5adSBarry Smith   PetscFunctionBegin;
7339566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
7349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
7359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
736f9fae5adSBarry Smith   idx  = a->j;
737f9fae5adSBarry Smith   v    = a->a;
738f9fae5adSBarry Smith   ii   = a->i;
739f9fae5adSBarry Smith 
740f9fae5adSBarry Smith   for (i=0; i<m; i++) {
741f9fae5adSBarry Smith     jrow = ii[i];
742f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
743f9fae5adSBarry Smith     sum1 = 0.0;
744f9fae5adSBarry Smith     sum2 = 0.0;
745f9fae5adSBarry Smith     sum3 = 0.0;
746f9fae5adSBarry Smith     sum4 = 0.0;
747f9fae5adSBarry Smith     sum5 = 0.0;
748f9fae5adSBarry Smith     for (j=0; j<n; j++) {
749f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
750f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
751f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
752f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
753f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
754f9fae5adSBarry Smith       jrow++;
755f9fae5adSBarry Smith     }
756f9fae5adSBarry Smith     y[5*i]   += sum1;
757f9fae5adSBarry Smith     y[5*i+1] += sum2;
758f9fae5adSBarry Smith     y[5*i+2] += sum3;
759f9fae5adSBarry Smith     y[5*i+3] += sum4;
760f9fae5adSBarry Smith     y[5*i+4] += sum5;
761f9fae5adSBarry Smith   }
762f9fae5adSBarry Smith 
7639566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
7649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
7659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
766f9fae5adSBarry Smith   PetscFunctionReturn(0);
767f9fae5adSBarry Smith }
768f9fae5adSBarry Smith 
769dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
770f9fae5adSBarry Smith {
7714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
772f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
773d2840e09SBarry Smith   const PetscScalar *x,*v;
774d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
775d2840e09SBarry Smith   PetscInt          n,i;
776d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
777f9fae5adSBarry Smith 
778f9fae5adSBarry Smith   PetscFunctionBegin;
7799566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
7809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
7819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
782bfec09a0SHong Zhang 
783f9fae5adSBarry Smith   for (i=0; i<m; i++) {
784bfec09a0SHong Zhang     idx    = a->j + a->i[i];
785bfec09a0SHong Zhang     v      = a->a + a->i[i];
786f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
787f9fae5adSBarry Smith     alpha1 = x[5*i];
788f9fae5adSBarry Smith     alpha2 = x[5*i+1];
789f9fae5adSBarry Smith     alpha3 = x[5*i+2];
790f9fae5adSBarry Smith     alpha4 = x[5*i+3];
791f9fae5adSBarry Smith     alpha5 = x[5*i+4];
792f9fae5adSBarry Smith     while (n-->0) {
793f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
794f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
795f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
796f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
797f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
798f9fae5adSBarry Smith       idx++; v++;
799f9fae5adSBarry Smith     }
800f9fae5adSBarry Smith   }
8019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
8029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
804f9fae5adSBarry Smith   PetscFunctionReturn(0);
805bcc973b7SBarry Smith }
806bcc973b7SBarry Smith 
8076cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
808dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8096cd98798SBarry Smith {
8106cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8116cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
812d2840e09SBarry Smith   const PetscScalar *x,*v;
813d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
814d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
815d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8166cd98798SBarry Smith 
8176cd98798SBarry Smith   PetscFunctionBegin;
8189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
8199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
8206cd98798SBarry Smith   idx  = a->j;
8216cd98798SBarry Smith   v    = a->a;
8226cd98798SBarry Smith   ii   = a->i;
8236cd98798SBarry Smith 
8246cd98798SBarry Smith   for (i=0; i<m; i++) {
8256cd98798SBarry Smith     jrow  = ii[i];
8266cd98798SBarry Smith     n     = ii[i+1] - jrow;
8276cd98798SBarry Smith     sum1  = 0.0;
8286cd98798SBarry Smith     sum2  = 0.0;
8296cd98798SBarry Smith     sum3  = 0.0;
8306cd98798SBarry Smith     sum4  = 0.0;
8316cd98798SBarry Smith     sum5  = 0.0;
8326cd98798SBarry Smith     sum6  = 0.0;
83326fbe8dcSKarl Rupp 
834b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8356cd98798SBarry Smith     for (j=0; j<n; j++) {
8366cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8376cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8386cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8396cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8406cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8416cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8426cd98798SBarry Smith       jrow++;
8436cd98798SBarry Smith     }
8446cd98798SBarry Smith     y[6*i]   = sum1;
8456cd98798SBarry Smith     y[6*i+1] = sum2;
8466cd98798SBarry Smith     y[6*i+2] = sum3;
8476cd98798SBarry Smith     y[6*i+3] = sum4;
8486cd98798SBarry Smith     y[6*i+4] = sum5;
8496cd98798SBarry Smith     y[6*i+5] = sum6;
8506cd98798SBarry Smith   }
8516cd98798SBarry Smith 
8529566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow));
8539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
8556cd98798SBarry Smith   PetscFunctionReturn(0);
8566cd98798SBarry Smith }
8576cd98798SBarry Smith 
858dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8596cd98798SBarry Smith {
8606cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8616cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
862d2840e09SBarry Smith   const PetscScalar *x,*v;
863d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
864d2840e09SBarry Smith   PetscInt          n,i;
865d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8666cd98798SBarry Smith 
8676cd98798SBarry Smith   PetscFunctionBegin;
8689566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
8699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
8709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
871bfec09a0SHong Zhang 
8726cd98798SBarry Smith   for (i=0; i<m; i++) {
873bfec09a0SHong Zhang     idx    = a->j + a->i[i];
874bfec09a0SHong Zhang     v      = a->a + a->i[i];
8756cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8766cd98798SBarry Smith     alpha1 = x[6*i];
8776cd98798SBarry Smith     alpha2 = x[6*i+1];
8786cd98798SBarry Smith     alpha3 = x[6*i+2];
8796cd98798SBarry Smith     alpha4 = x[6*i+3];
8806cd98798SBarry Smith     alpha5 = x[6*i+4];
8816cd98798SBarry Smith     alpha6 = x[6*i+5];
8826cd98798SBarry Smith     while (n-->0) {
8836cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8846cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8856cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8866cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8876cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8886cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8896cd98798SBarry Smith       idx++; v++;
8906cd98798SBarry Smith     }
8916cd98798SBarry Smith   }
8929566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
8939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
8956cd98798SBarry Smith   PetscFunctionReturn(0);
8966cd98798SBarry Smith }
8976cd98798SBarry Smith 
898dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8996cd98798SBarry Smith {
9006cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9016cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
902d2840e09SBarry Smith   const PetscScalar *x,*v;
903d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
904b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
905d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9066cd98798SBarry Smith 
9076cd98798SBarry Smith   PetscFunctionBegin;
9089566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
9099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
9116cd98798SBarry Smith   idx  = a->j;
9126cd98798SBarry Smith   v    = a->a;
9136cd98798SBarry Smith   ii   = a->i;
9146cd98798SBarry Smith 
9156cd98798SBarry Smith   for (i=0; i<m; i++) {
9166cd98798SBarry Smith     jrow = ii[i];
9176cd98798SBarry Smith     n    = ii[i+1] - jrow;
9186cd98798SBarry Smith     sum1 = 0.0;
9196cd98798SBarry Smith     sum2 = 0.0;
9206cd98798SBarry Smith     sum3 = 0.0;
9216cd98798SBarry Smith     sum4 = 0.0;
9226cd98798SBarry Smith     sum5 = 0.0;
9236cd98798SBarry Smith     sum6 = 0.0;
9246cd98798SBarry Smith     for (j=0; j<n; j++) {
9256cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9266cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9276cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9286cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9296cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9306cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9316cd98798SBarry Smith       jrow++;
9326cd98798SBarry Smith     }
9336cd98798SBarry Smith     y[6*i]   += sum1;
9346cd98798SBarry Smith     y[6*i+1] += sum2;
9356cd98798SBarry Smith     y[6*i+2] += sum3;
9366cd98798SBarry Smith     y[6*i+3] += sum4;
9376cd98798SBarry Smith     y[6*i+4] += sum5;
9386cd98798SBarry Smith     y[6*i+5] += sum6;
9396cd98798SBarry Smith   }
9406cd98798SBarry Smith 
9419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
9429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
9439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
9446cd98798SBarry Smith   PetscFunctionReturn(0);
9456cd98798SBarry Smith }
9466cd98798SBarry Smith 
947dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9486cd98798SBarry Smith {
9496cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9506cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
951d2840e09SBarry Smith   const PetscScalar *x,*v;
952d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
953d2840e09SBarry Smith   PetscInt          n,i;
954d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9556cd98798SBarry Smith 
9566cd98798SBarry Smith   PetscFunctionBegin;
9579566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
9589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
960bfec09a0SHong Zhang 
9616cd98798SBarry Smith   for (i=0; i<m; i++) {
962bfec09a0SHong Zhang     idx    = a->j + a->i[i];
963bfec09a0SHong Zhang     v      = a->a + a->i[i];
9646cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9656cd98798SBarry Smith     alpha1 = x[6*i];
9666cd98798SBarry Smith     alpha2 = x[6*i+1];
9676cd98798SBarry Smith     alpha3 = x[6*i+2];
9686cd98798SBarry Smith     alpha4 = x[6*i+3];
9696cd98798SBarry Smith     alpha5 = x[6*i+4];
9706cd98798SBarry Smith     alpha6 = x[6*i+5];
9716cd98798SBarry Smith     while (n-->0) {
9726cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9736cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9746cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9756cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9766cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9776cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9786cd98798SBarry Smith       idx++; v++;
9796cd98798SBarry Smith     }
9806cd98798SBarry Smith   }
9819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
9829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
9839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
9846cd98798SBarry Smith   PetscFunctionReturn(0);
9856cd98798SBarry Smith }
9866cd98798SBarry Smith 
98766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
988ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989ed8eea03SBarry Smith {
990ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
991ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
992d2840e09SBarry Smith   const PetscScalar *x,*v;
993d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
994d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
995d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
996ed8eea03SBarry Smith 
997ed8eea03SBarry Smith   PetscFunctionBegin;
9989566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9999566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1000ed8eea03SBarry Smith   idx  = a->j;
1001ed8eea03SBarry Smith   v    = a->a;
1002ed8eea03SBarry Smith   ii   = a->i;
1003ed8eea03SBarry Smith 
1004ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1005ed8eea03SBarry Smith     jrow  = ii[i];
1006ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1007ed8eea03SBarry Smith     sum1  = 0.0;
1008ed8eea03SBarry Smith     sum2  = 0.0;
1009ed8eea03SBarry Smith     sum3  = 0.0;
1010ed8eea03SBarry Smith     sum4  = 0.0;
1011ed8eea03SBarry Smith     sum5  = 0.0;
1012ed8eea03SBarry Smith     sum6  = 0.0;
1013ed8eea03SBarry Smith     sum7  = 0.0;
101426fbe8dcSKarl Rupp 
1015b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1016ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1017ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1018ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1019ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1020ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1021ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1022ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1023ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1024ed8eea03SBarry Smith       jrow++;
1025ed8eea03SBarry Smith     }
1026ed8eea03SBarry Smith     y[7*i]   = sum1;
1027ed8eea03SBarry Smith     y[7*i+1] = sum2;
1028ed8eea03SBarry Smith     y[7*i+2] = sum3;
1029ed8eea03SBarry Smith     y[7*i+3] = sum4;
1030ed8eea03SBarry Smith     y[7*i+4] = sum5;
1031ed8eea03SBarry Smith     y[7*i+5] = sum6;
1032ed8eea03SBarry Smith     y[7*i+6] = sum7;
1033ed8eea03SBarry Smith   }
1034ed8eea03SBarry Smith 
10359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow));
10369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
10379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1038ed8eea03SBarry Smith   PetscFunctionReturn(0);
1039ed8eea03SBarry Smith }
1040ed8eea03SBarry Smith 
1041ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042ed8eea03SBarry Smith {
1043ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1044ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1045d2840e09SBarry Smith   const PetscScalar *x,*v;
1046d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1047d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1048d2840e09SBarry Smith   PetscInt          n,i;
1049ed8eea03SBarry Smith 
1050ed8eea03SBarry Smith   PetscFunctionBegin;
10519566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
10529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
10539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1054ed8eea03SBarry Smith 
1055ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1056ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1057ed8eea03SBarry Smith     v      = a->a + a->i[i];
1058ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1059ed8eea03SBarry Smith     alpha1 = x[7*i];
1060ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1061ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1062ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1063ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1064ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1065ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1066ed8eea03SBarry Smith     while (n-->0) {
1067ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1068ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1069ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1070ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1071ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1072ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1073ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1074ed8eea03SBarry Smith       idx++; v++;
1075ed8eea03SBarry Smith     }
1076ed8eea03SBarry Smith   }
10779566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
10789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
10799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1080ed8eea03SBarry Smith   PetscFunctionReturn(0);
1081ed8eea03SBarry Smith }
1082ed8eea03SBarry Smith 
1083ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1084ed8eea03SBarry Smith {
1085ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1086ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1087d2840e09SBarry Smith   const PetscScalar *x,*v;
1088d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1090ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1091ed8eea03SBarry Smith 
1092ed8eea03SBarry Smith   PetscFunctionBegin;
10939566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
10949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
10959566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1096ed8eea03SBarry Smith   idx  = a->j;
1097ed8eea03SBarry Smith   v    = a->a;
1098ed8eea03SBarry Smith   ii   = a->i;
1099ed8eea03SBarry Smith 
1100ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1101ed8eea03SBarry Smith     jrow = ii[i];
1102ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1103ed8eea03SBarry Smith     sum1 = 0.0;
1104ed8eea03SBarry Smith     sum2 = 0.0;
1105ed8eea03SBarry Smith     sum3 = 0.0;
1106ed8eea03SBarry Smith     sum4 = 0.0;
1107ed8eea03SBarry Smith     sum5 = 0.0;
1108ed8eea03SBarry Smith     sum6 = 0.0;
1109ed8eea03SBarry Smith     sum7 = 0.0;
1110ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1111ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1112ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1113ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1114ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1115ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1116ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1117ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1118ed8eea03SBarry Smith       jrow++;
1119ed8eea03SBarry Smith     }
1120ed8eea03SBarry Smith     y[7*i]   += sum1;
1121ed8eea03SBarry Smith     y[7*i+1] += sum2;
1122ed8eea03SBarry Smith     y[7*i+2] += sum3;
1123ed8eea03SBarry Smith     y[7*i+3] += sum4;
1124ed8eea03SBarry Smith     y[7*i+4] += sum5;
1125ed8eea03SBarry Smith     y[7*i+5] += sum6;
1126ed8eea03SBarry Smith     y[7*i+6] += sum7;
1127ed8eea03SBarry Smith   }
1128ed8eea03SBarry Smith 
11299566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
11309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
11319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1132ed8eea03SBarry Smith   PetscFunctionReturn(0);
1133ed8eea03SBarry Smith }
1134ed8eea03SBarry Smith 
1135ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1136ed8eea03SBarry Smith {
1137ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1138ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1139d2840e09SBarry Smith   const PetscScalar *x,*v;
1140d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1141d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1142d2840e09SBarry Smith   PetscInt          n,i;
1143ed8eea03SBarry Smith 
1144ed8eea03SBarry Smith   PetscFunctionBegin;
11459566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
11469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
11479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1148ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1149ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1150ed8eea03SBarry Smith     v      = a->a + a->i[i];
1151ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1152ed8eea03SBarry Smith     alpha1 = x[7*i];
1153ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1154ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1155ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1156ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1157ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1158ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1159ed8eea03SBarry Smith     while (n-->0) {
1160ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1161ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1162ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1163ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1164ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1165ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1166ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1167ed8eea03SBarry Smith       idx++; v++;
1168ed8eea03SBarry Smith     }
1169ed8eea03SBarry Smith   }
11709566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
11719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
11729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1173ed8eea03SBarry Smith   PetscFunctionReturn(0);
1174ed8eea03SBarry Smith }
1175ed8eea03SBarry Smith 
1176dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
117766ed3db0SBarry Smith {
117866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
117966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1180d2840e09SBarry Smith   const PetscScalar *x,*v;
1181d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1182d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1183d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
118466ed3db0SBarry Smith 
118566ed3db0SBarry Smith   PetscFunctionBegin;
11869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
11879566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
118866ed3db0SBarry Smith   idx  = a->j;
118966ed3db0SBarry Smith   v    = a->a;
119066ed3db0SBarry Smith   ii   = a->i;
119166ed3db0SBarry Smith 
119266ed3db0SBarry Smith   for (i=0; i<m; i++) {
119366ed3db0SBarry Smith     jrow  = ii[i];
119466ed3db0SBarry Smith     n     = ii[i+1] - jrow;
119566ed3db0SBarry Smith     sum1  = 0.0;
119666ed3db0SBarry Smith     sum2  = 0.0;
119766ed3db0SBarry Smith     sum3  = 0.0;
119866ed3db0SBarry Smith     sum4  = 0.0;
119966ed3db0SBarry Smith     sum5  = 0.0;
120066ed3db0SBarry Smith     sum6  = 0.0;
120166ed3db0SBarry Smith     sum7  = 0.0;
120266ed3db0SBarry Smith     sum8  = 0.0;
120326fbe8dcSKarl Rupp 
1204b3c51c66SMatthew Knepley     nonzerorow += (n>0);
120566ed3db0SBarry Smith     for (j=0; j<n; j++) {
120666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
120766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
120866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
120966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
121066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
121166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
121266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
121366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
121466ed3db0SBarry Smith       jrow++;
121566ed3db0SBarry Smith     }
121666ed3db0SBarry Smith     y[8*i]   = sum1;
121766ed3db0SBarry Smith     y[8*i+1] = sum2;
121866ed3db0SBarry Smith     y[8*i+2] = sum3;
121966ed3db0SBarry Smith     y[8*i+3] = sum4;
122066ed3db0SBarry Smith     y[8*i+4] = sum5;
122166ed3db0SBarry Smith     y[8*i+5] = sum6;
122266ed3db0SBarry Smith     y[8*i+6] = sum7;
122366ed3db0SBarry Smith     y[8*i+7] = sum8;
122466ed3db0SBarry Smith   }
122566ed3db0SBarry Smith 
12269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow));
12279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
12289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
122966ed3db0SBarry Smith   PetscFunctionReturn(0);
123066ed3db0SBarry Smith }
123166ed3db0SBarry Smith 
1232dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
123366ed3db0SBarry Smith {
123466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
123566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1236d2840e09SBarry Smith   const PetscScalar *x,*v;
1237d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1238d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1239d2840e09SBarry Smith   PetscInt          n,i;
124066ed3db0SBarry Smith 
124166ed3db0SBarry Smith   PetscFunctionBegin;
12429566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
12439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
12449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1245bfec09a0SHong Zhang 
124666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1247bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1248bfec09a0SHong Zhang     v      = a->a + a->i[i];
124966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
125066ed3db0SBarry Smith     alpha1 = x[8*i];
125166ed3db0SBarry Smith     alpha2 = x[8*i+1];
125266ed3db0SBarry Smith     alpha3 = x[8*i+2];
125366ed3db0SBarry Smith     alpha4 = x[8*i+3];
125466ed3db0SBarry Smith     alpha5 = x[8*i+4];
125566ed3db0SBarry Smith     alpha6 = x[8*i+5];
125666ed3db0SBarry Smith     alpha7 = x[8*i+6];
125766ed3db0SBarry Smith     alpha8 = x[8*i+7];
125866ed3db0SBarry Smith     while (n-->0) {
125966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
126066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
126166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
126266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
126366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
126466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
126566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
126666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
126766ed3db0SBarry Smith       idx++; v++;
126866ed3db0SBarry Smith     }
126966ed3db0SBarry Smith   }
12709566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
12719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
12729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
127366ed3db0SBarry Smith   PetscFunctionReturn(0);
127466ed3db0SBarry Smith }
127566ed3db0SBarry Smith 
1276dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
127766ed3db0SBarry Smith {
127866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
127966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1280d2840e09SBarry Smith   const PetscScalar *x,*v;
1281d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1282d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1283b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
128466ed3db0SBarry Smith 
128566ed3db0SBarry Smith   PetscFunctionBegin;
12869566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
12879566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
12889566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
128966ed3db0SBarry Smith   idx  = a->j;
129066ed3db0SBarry Smith   v    = a->a;
129166ed3db0SBarry Smith   ii   = a->i;
129266ed3db0SBarry Smith 
129366ed3db0SBarry Smith   for (i=0; i<m; i++) {
129466ed3db0SBarry Smith     jrow = ii[i];
129566ed3db0SBarry Smith     n    = ii[i+1] - jrow;
129666ed3db0SBarry Smith     sum1 = 0.0;
129766ed3db0SBarry Smith     sum2 = 0.0;
129866ed3db0SBarry Smith     sum3 = 0.0;
129966ed3db0SBarry Smith     sum4 = 0.0;
130066ed3db0SBarry Smith     sum5 = 0.0;
130166ed3db0SBarry Smith     sum6 = 0.0;
130266ed3db0SBarry Smith     sum7 = 0.0;
130366ed3db0SBarry Smith     sum8 = 0.0;
130466ed3db0SBarry Smith     for (j=0; j<n; j++) {
130566ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
130666ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
130766ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
130866ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
130966ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
131066ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
131166ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
131266ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
131366ed3db0SBarry Smith       jrow++;
131466ed3db0SBarry Smith     }
131566ed3db0SBarry Smith     y[8*i]   += sum1;
131666ed3db0SBarry Smith     y[8*i+1] += sum2;
131766ed3db0SBarry Smith     y[8*i+2] += sum3;
131866ed3db0SBarry Smith     y[8*i+3] += sum4;
131966ed3db0SBarry Smith     y[8*i+4] += sum5;
132066ed3db0SBarry Smith     y[8*i+5] += sum6;
132166ed3db0SBarry Smith     y[8*i+6] += sum7;
132266ed3db0SBarry Smith     y[8*i+7] += sum8;
132366ed3db0SBarry Smith   }
132466ed3db0SBarry Smith 
13259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
13269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
13279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
132866ed3db0SBarry Smith   PetscFunctionReturn(0);
132966ed3db0SBarry Smith }
133066ed3db0SBarry Smith 
1331dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
133266ed3db0SBarry Smith {
133366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
133466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1335d2840e09SBarry Smith   const PetscScalar *x,*v;
1336d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1337d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1338d2840e09SBarry Smith   PetscInt          n,i;
133966ed3db0SBarry Smith 
134066ed3db0SBarry Smith   PetscFunctionBegin;
13419566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
13429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
13439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
134466ed3db0SBarry Smith   for (i=0; i<m; i++) {
1345bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1346bfec09a0SHong Zhang     v      = a->a + a->i[i];
134766ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
134866ed3db0SBarry Smith     alpha1 = x[8*i];
134966ed3db0SBarry Smith     alpha2 = x[8*i+1];
135066ed3db0SBarry Smith     alpha3 = x[8*i+2];
135166ed3db0SBarry Smith     alpha4 = x[8*i+3];
135266ed3db0SBarry Smith     alpha5 = x[8*i+4];
135366ed3db0SBarry Smith     alpha6 = x[8*i+5];
135466ed3db0SBarry Smith     alpha7 = x[8*i+6];
135566ed3db0SBarry Smith     alpha8 = x[8*i+7];
135666ed3db0SBarry Smith     while (n-->0) {
135766ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
135866ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
135966ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
136066ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
136166ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
136266ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
136366ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
136466ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
136566ed3db0SBarry Smith       idx++; v++;
136666ed3db0SBarry Smith     }
136766ed3db0SBarry Smith   }
13689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
13699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
13709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
13712f7816d4SBarry Smith   PetscFunctionReturn(0);
13722f7816d4SBarry Smith }
13732f7816d4SBarry Smith 
13742b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1375dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
13762b692628SMatthew Knepley {
13772b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
13782b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1379d2840e09SBarry Smith   const PetscScalar *x,*v;
1380d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1381d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1382d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
13832b692628SMatthew Knepley 
13842b692628SMatthew Knepley   PetscFunctionBegin;
13859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
13869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
13872b692628SMatthew Knepley   idx  = a->j;
13882b692628SMatthew Knepley   v    = a->a;
13892b692628SMatthew Knepley   ii   = a->i;
13902b692628SMatthew Knepley 
13912b692628SMatthew Knepley   for (i=0; i<m; i++) {
13922b692628SMatthew Knepley     jrow  = ii[i];
13932b692628SMatthew Knepley     n     = ii[i+1] - jrow;
13942b692628SMatthew Knepley     sum1  = 0.0;
13952b692628SMatthew Knepley     sum2  = 0.0;
13962b692628SMatthew Knepley     sum3  = 0.0;
13972b692628SMatthew Knepley     sum4  = 0.0;
13982b692628SMatthew Knepley     sum5  = 0.0;
13992b692628SMatthew Knepley     sum6  = 0.0;
14002b692628SMatthew Knepley     sum7  = 0.0;
14012b692628SMatthew Knepley     sum8  = 0.0;
14022b692628SMatthew Knepley     sum9  = 0.0;
140326fbe8dcSKarl Rupp 
1404b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14052b692628SMatthew Knepley     for (j=0; j<n; j++) {
14062b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14072b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14082b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14092b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14102b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14112b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14122b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14132b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14142b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14152b692628SMatthew Knepley       jrow++;
14162b692628SMatthew Knepley     }
14172b692628SMatthew Knepley     y[9*i]   = sum1;
14182b692628SMatthew Knepley     y[9*i+1] = sum2;
14192b692628SMatthew Knepley     y[9*i+2] = sum3;
14202b692628SMatthew Knepley     y[9*i+3] = sum4;
14212b692628SMatthew Knepley     y[9*i+4] = sum5;
14222b692628SMatthew Knepley     y[9*i+5] = sum6;
14232b692628SMatthew Knepley     y[9*i+6] = sum7;
14242b692628SMatthew Knepley     y[9*i+7] = sum8;
14252b692628SMatthew Knepley     y[9*i+8] = sum9;
14262b692628SMatthew Knepley   }
14272b692628SMatthew Knepley 
14289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow));
14299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
14309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
14312b692628SMatthew Knepley   PetscFunctionReturn(0);
14322b692628SMatthew Knepley }
14332b692628SMatthew Knepley 
1434b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1435b9cda22cSBarry Smith 
1436dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14372b692628SMatthew Knepley {
14382b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14392b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1440d2840e09SBarry Smith   const PetscScalar *x,*v;
1441d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1442d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1443d2840e09SBarry Smith   PetscInt          n,i;
14442b692628SMatthew Knepley 
14452b692628SMatthew Knepley   PetscFunctionBegin;
14469566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
14479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
14489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
14492b692628SMatthew Knepley 
14502b692628SMatthew Knepley   for (i=0; i<m; i++) {
14512b692628SMatthew Knepley     idx    = a->j + a->i[i];
14522b692628SMatthew Knepley     v      = a->a + a->i[i];
14532b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14542b692628SMatthew Knepley     alpha1 = x[9*i];
14552b692628SMatthew Knepley     alpha2 = x[9*i+1];
14562b692628SMatthew Knepley     alpha3 = x[9*i+2];
14572b692628SMatthew Knepley     alpha4 = x[9*i+3];
14582b692628SMatthew Knepley     alpha5 = x[9*i+4];
14592b692628SMatthew Knepley     alpha6 = x[9*i+5];
14602b692628SMatthew Knepley     alpha7 = x[9*i+6];
14612b692628SMatthew Knepley     alpha8 = x[9*i+7];
14622f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14632b692628SMatthew Knepley     while (n-->0) {
14642b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14652b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14662b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
14672b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
14682b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
14692b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
14702b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
14712b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
14722b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
14732b692628SMatthew Knepley       idx++; v++;
14742b692628SMatthew Knepley     }
14752b692628SMatthew Knepley   }
14769566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
14779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
14789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
14792b692628SMatthew Knepley   PetscFunctionReturn(0);
14802b692628SMatthew Knepley }
14812b692628SMatthew Knepley 
1482dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
14832b692628SMatthew Knepley {
14842b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14852b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1486d2840e09SBarry Smith   const PetscScalar *x,*v;
1487d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1489b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
14902b692628SMatthew Knepley 
14912b692628SMatthew Knepley   PetscFunctionBegin;
14929566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
14939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
14949566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
14952b692628SMatthew Knepley   idx  = a->j;
14962b692628SMatthew Knepley   v    = a->a;
14972b692628SMatthew Knepley   ii   = a->i;
14982b692628SMatthew Knepley 
14992b692628SMatthew Knepley   for (i=0; i<m; i++) {
15002b692628SMatthew Knepley     jrow = ii[i];
15012b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15022b692628SMatthew Knepley     sum1 = 0.0;
15032b692628SMatthew Knepley     sum2 = 0.0;
15042b692628SMatthew Knepley     sum3 = 0.0;
15052b692628SMatthew Knepley     sum4 = 0.0;
15062b692628SMatthew Knepley     sum5 = 0.0;
15072b692628SMatthew Knepley     sum6 = 0.0;
15082b692628SMatthew Knepley     sum7 = 0.0;
15092b692628SMatthew Knepley     sum8 = 0.0;
15102b692628SMatthew Knepley     sum9 = 0.0;
15112b692628SMatthew Knepley     for (j=0; j<n; j++) {
15122b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15132b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15142b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15152b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15162b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15172b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15182b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15192b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15202b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15212b692628SMatthew Knepley       jrow++;
15222b692628SMatthew Knepley     }
15232b692628SMatthew Knepley     y[9*i]   += sum1;
15242b692628SMatthew Knepley     y[9*i+1] += sum2;
15252b692628SMatthew Knepley     y[9*i+2] += sum3;
15262b692628SMatthew Knepley     y[9*i+3] += sum4;
15272b692628SMatthew Knepley     y[9*i+4] += sum5;
15282b692628SMatthew Knepley     y[9*i+5] += sum6;
15292b692628SMatthew Knepley     y[9*i+6] += sum7;
15302b692628SMatthew Knepley     y[9*i+7] += sum8;
15312b692628SMatthew Knepley     y[9*i+8] += sum9;
15322b692628SMatthew Knepley   }
15332b692628SMatthew Knepley 
15349566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
15359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
15369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
15372b692628SMatthew Knepley   PetscFunctionReturn(0);
15382b692628SMatthew Knepley }
15392b692628SMatthew Knepley 
1540dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15412b692628SMatthew Knepley {
15422b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15432b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1544d2840e09SBarry Smith   const PetscScalar *x,*v;
1545d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1546d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1547d2840e09SBarry Smith   PetscInt          n,i;
15482b692628SMatthew Knepley 
15492b692628SMatthew Knepley   PetscFunctionBegin;
15509566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
15519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
15529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
15532b692628SMatthew Knepley   for (i=0; i<m; i++) {
15542b692628SMatthew Knepley     idx    = a->j + a->i[i];
15552b692628SMatthew Knepley     v      = a->a + a->i[i];
15562b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15572b692628SMatthew Knepley     alpha1 = x[9*i];
15582b692628SMatthew Knepley     alpha2 = x[9*i+1];
15592b692628SMatthew Knepley     alpha3 = x[9*i+2];
15602b692628SMatthew Knepley     alpha4 = x[9*i+3];
15612b692628SMatthew Knepley     alpha5 = x[9*i+4];
15622b692628SMatthew Knepley     alpha6 = x[9*i+5];
15632b692628SMatthew Knepley     alpha7 = x[9*i+6];
15642b692628SMatthew Knepley     alpha8 = x[9*i+7];
15652b692628SMatthew Knepley     alpha9 = x[9*i+8];
15662b692628SMatthew Knepley     while (n-->0) {
15672b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15682b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15692b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15702b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15712b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15722b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15732b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15742b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15752b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15762b692628SMatthew Knepley       idx++; v++;
15772b692628SMatthew Knepley     }
15782b692628SMatthew Knepley   }
15799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
15809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
15819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
15822b692628SMatthew Knepley   PetscFunctionReturn(0);
15832b692628SMatthew Knepley }
1584b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1585b9cda22cSBarry Smith {
1586b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1587b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1588d2840e09SBarry Smith   const PetscScalar *x,*v;
1589d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1590d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1591d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1592b9cda22cSBarry Smith 
1593b9cda22cSBarry Smith   PetscFunctionBegin;
15949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
15959566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1596b9cda22cSBarry Smith   idx  = a->j;
1597b9cda22cSBarry Smith   v    = a->a;
1598b9cda22cSBarry Smith   ii   = a->i;
1599b9cda22cSBarry Smith 
1600b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1601b9cda22cSBarry Smith     jrow  = ii[i];
1602b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1603b9cda22cSBarry Smith     sum1  = 0.0;
1604b9cda22cSBarry Smith     sum2  = 0.0;
1605b9cda22cSBarry Smith     sum3  = 0.0;
1606b9cda22cSBarry Smith     sum4  = 0.0;
1607b9cda22cSBarry Smith     sum5  = 0.0;
1608b9cda22cSBarry Smith     sum6  = 0.0;
1609b9cda22cSBarry Smith     sum7  = 0.0;
1610b9cda22cSBarry Smith     sum8  = 0.0;
1611b9cda22cSBarry Smith     sum9  = 0.0;
1612b9cda22cSBarry Smith     sum10 = 0.0;
161326fbe8dcSKarl Rupp 
1614b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1615b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1616b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1617b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1618b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1619b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1620b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1621b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1622b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1623b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1624b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1625b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1626b9cda22cSBarry Smith       jrow++;
1627b9cda22cSBarry Smith     }
1628b9cda22cSBarry Smith     y[10*i]   = sum1;
1629b9cda22cSBarry Smith     y[10*i+1] = sum2;
1630b9cda22cSBarry Smith     y[10*i+2] = sum3;
1631b9cda22cSBarry Smith     y[10*i+3] = sum4;
1632b9cda22cSBarry Smith     y[10*i+4] = sum5;
1633b9cda22cSBarry Smith     y[10*i+5] = sum6;
1634b9cda22cSBarry Smith     y[10*i+6] = sum7;
1635b9cda22cSBarry Smith     y[10*i+7] = sum8;
1636b9cda22cSBarry Smith     y[10*i+8] = sum9;
1637b9cda22cSBarry Smith     y[10*i+9] = sum10;
1638b9cda22cSBarry Smith   }
1639b9cda22cSBarry Smith 
16409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow));
16419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
16429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1643b9cda22cSBarry Smith   PetscFunctionReturn(0);
1644b9cda22cSBarry Smith }
1645b9cda22cSBarry Smith 
1646b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1647b9cda22cSBarry Smith {
1648b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1649b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1650d2840e09SBarry Smith   const PetscScalar *x,*v;
1651d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1652d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1653b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1654b9cda22cSBarry Smith 
1655b9cda22cSBarry Smith   PetscFunctionBegin;
16569566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
16579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
16589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1659b9cda22cSBarry Smith   idx  = a->j;
1660b9cda22cSBarry Smith   v    = a->a;
1661b9cda22cSBarry Smith   ii   = a->i;
1662b9cda22cSBarry Smith 
1663b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1664b9cda22cSBarry Smith     jrow  = ii[i];
1665b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1666b9cda22cSBarry Smith     sum1  = 0.0;
1667b9cda22cSBarry Smith     sum2  = 0.0;
1668b9cda22cSBarry Smith     sum3  = 0.0;
1669b9cda22cSBarry Smith     sum4  = 0.0;
1670b9cda22cSBarry Smith     sum5  = 0.0;
1671b9cda22cSBarry Smith     sum6  = 0.0;
1672b9cda22cSBarry Smith     sum7  = 0.0;
1673b9cda22cSBarry Smith     sum8  = 0.0;
1674b9cda22cSBarry Smith     sum9  = 0.0;
1675b9cda22cSBarry Smith     sum10 = 0.0;
1676b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1677b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1678b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1679b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1680b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1681b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1682b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1683b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1684b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1685b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1686b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1687b9cda22cSBarry Smith       jrow++;
1688b9cda22cSBarry Smith     }
1689b9cda22cSBarry Smith     y[10*i]   += sum1;
1690b9cda22cSBarry Smith     y[10*i+1] += sum2;
1691b9cda22cSBarry Smith     y[10*i+2] += sum3;
1692b9cda22cSBarry Smith     y[10*i+3] += sum4;
1693b9cda22cSBarry Smith     y[10*i+4] += sum5;
1694b9cda22cSBarry Smith     y[10*i+5] += sum6;
1695b9cda22cSBarry Smith     y[10*i+6] += sum7;
1696b9cda22cSBarry Smith     y[10*i+7] += sum8;
1697b9cda22cSBarry Smith     y[10*i+8] += sum9;
1698b9cda22cSBarry Smith     y[10*i+9] += sum10;
1699b9cda22cSBarry Smith   }
1700b9cda22cSBarry Smith 
17019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1704b9cda22cSBarry Smith   PetscFunctionReturn(0);
1705b9cda22cSBarry Smith }
1706b9cda22cSBarry Smith 
1707b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1708b9cda22cSBarry Smith {
1709b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1710b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1711d2840e09SBarry Smith   const PetscScalar *x,*v;
1712d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1713d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1714d2840e09SBarry Smith   PetscInt          n,i;
1715b9cda22cSBarry Smith 
1716b9cda22cSBarry Smith   PetscFunctionBegin;
17179566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
17189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
17199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1720b9cda22cSBarry Smith 
1721b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1722b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1723b9cda22cSBarry Smith     v       = a->a + a->i[i];
1724b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1725e29fdc22SBarry Smith     alpha1  = x[10*i];
1726e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1727e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1728e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1729e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1730e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1731e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1732e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1733e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1734b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1735b9cda22cSBarry Smith     while (n-->0) {
1736e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1737e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1738e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1739e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1740e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1741e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1742e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1743e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1744e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1745b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1746b9cda22cSBarry Smith       idx++; v++;
1747b9cda22cSBarry Smith     }
1748b9cda22cSBarry Smith   }
17499566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1752b9cda22cSBarry Smith   PetscFunctionReturn(0);
1753b9cda22cSBarry Smith }
1754b9cda22cSBarry Smith 
1755b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1756b9cda22cSBarry Smith {
1757b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1758b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1759d2840e09SBarry Smith   const PetscScalar *x,*v;
1760d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1761d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1762d2840e09SBarry Smith   PetscInt          n,i;
1763b9cda22cSBarry Smith 
1764b9cda22cSBarry Smith   PetscFunctionBegin;
17659566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
17669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
17679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1768b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1769b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1770b9cda22cSBarry Smith     v       = a->a + a->i[i];
1771b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1772b9cda22cSBarry Smith     alpha1  = x[10*i];
1773b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1774b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1775b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1776b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1777b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1778b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1779b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1780b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1781b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1782b9cda22cSBarry Smith     while (n-->0) {
1783b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1784b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1785b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1786b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1787b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1788b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1789b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1790b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1791b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1792b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1793b9cda22cSBarry Smith       idx++; v++;
1794b9cda22cSBarry Smith     }
1795b9cda22cSBarry Smith   }
17969566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1799b9cda22cSBarry Smith   PetscFunctionReturn(0);
1800b9cda22cSBarry Smith }
1801b9cda22cSBarry Smith 
18022f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1803dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1804dbdd7285SBarry Smith {
1805dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1806dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1807d2840e09SBarry Smith   const PetscScalar *x,*v;
1808d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1809d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1810d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1811dbdd7285SBarry Smith 
1812dbdd7285SBarry Smith   PetscFunctionBegin;
18139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
18149566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1815dbdd7285SBarry Smith   idx  = a->j;
1816dbdd7285SBarry Smith   v    = a->a;
1817dbdd7285SBarry Smith   ii   = a->i;
1818dbdd7285SBarry Smith 
1819dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1820dbdd7285SBarry Smith     jrow  = ii[i];
1821dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1822dbdd7285SBarry Smith     sum1  = 0.0;
1823dbdd7285SBarry Smith     sum2  = 0.0;
1824dbdd7285SBarry Smith     sum3  = 0.0;
1825dbdd7285SBarry Smith     sum4  = 0.0;
1826dbdd7285SBarry Smith     sum5  = 0.0;
1827dbdd7285SBarry Smith     sum6  = 0.0;
1828dbdd7285SBarry Smith     sum7  = 0.0;
1829dbdd7285SBarry Smith     sum8  = 0.0;
1830dbdd7285SBarry Smith     sum9  = 0.0;
1831dbdd7285SBarry Smith     sum10 = 0.0;
1832dbdd7285SBarry Smith     sum11 = 0.0;
183326fbe8dcSKarl Rupp 
1834dbdd7285SBarry Smith     nonzerorow += (n>0);
1835dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1836dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1837dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1838dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1839dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1840dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1841dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1842dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1843dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1844dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1845dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1846dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1847dbdd7285SBarry Smith       jrow++;
1848dbdd7285SBarry Smith     }
1849dbdd7285SBarry Smith     y[11*i]    = sum1;
1850dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1851dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1852dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1853dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1854dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1855dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1856dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1857dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1858dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1859dbdd7285SBarry Smith     y[11*i+10] = sum11;
1860dbdd7285SBarry Smith   }
1861dbdd7285SBarry Smith 
18629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow));
18639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
18649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1865dbdd7285SBarry Smith   PetscFunctionReturn(0);
1866dbdd7285SBarry Smith }
1867dbdd7285SBarry Smith 
1868dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1869dbdd7285SBarry Smith {
1870dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1871dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1872d2840e09SBarry Smith   const PetscScalar *x,*v;
1873d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1874d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1875dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1876dbdd7285SBarry Smith 
1877dbdd7285SBarry Smith   PetscFunctionBegin;
18789566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
18799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
18809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1881dbdd7285SBarry Smith   idx  = a->j;
1882dbdd7285SBarry Smith   v    = a->a;
1883dbdd7285SBarry Smith   ii   = a->i;
1884dbdd7285SBarry Smith 
1885dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1886dbdd7285SBarry Smith     jrow  = ii[i];
1887dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1888dbdd7285SBarry Smith     sum1  = 0.0;
1889dbdd7285SBarry Smith     sum2  = 0.0;
1890dbdd7285SBarry Smith     sum3  = 0.0;
1891dbdd7285SBarry Smith     sum4  = 0.0;
1892dbdd7285SBarry Smith     sum5  = 0.0;
1893dbdd7285SBarry Smith     sum6  = 0.0;
1894dbdd7285SBarry Smith     sum7  = 0.0;
1895dbdd7285SBarry Smith     sum8  = 0.0;
1896dbdd7285SBarry Smith     sum9  = 0.0;
1897dbdd7285SBarry Smith     sum10 = 0.0;
1898dbdd7285SBarry Smith     sum11 = 0.0;
1899dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1900dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1901dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1902dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1903dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1904dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1905dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1906dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1907dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1908dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1909dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1910dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1911dbdd7285SBarry Smith       jrow++;
1912dbdd7285SBarry Smith     }
1913dbdd7285SBarry Smith     y[11*i]    += sum1;
1914dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1915dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1916dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1917dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1918dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1919dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1920dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1921dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1922dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1923dbdd7285SBarry Smith     y[11*i+10] += sum11;
1924dbdd7285SBarry Smith   }
1925dbdd7285SBarry Smith 
19269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
19279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
19289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1929dbdd7285SBarry Smith   PetscFunctionReturn(0);
1930dbdd7285SBarry Smith }
1931dbdd7285SBarry Smith 
1932dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933dbdd7285SBarry Smith {
1934dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1935dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1936d2840e09SBarry Smith   const PetscScalar *x,*v;
1937d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1938d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1939d2840e09SBarry Smith   PetscInt          n,i;
1940dbdd7285SBarry Smith 
1941dbdd7285SBarry Smith   PetscFunctionBegin;
19429566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
19439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
19449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1945dbdd7285SBarry Smith 
1946dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1947dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1948dbdd7285SBarry Smith     v       = a->a + a->i[i];
1949dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1950dbdd7285SBarry Smith     alpha1  = x[11*i];
1951dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1952dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1953dbdd7285SBarry Smith     alpha4  = x[11*i+3];
1954dbdd7285SBarry Smith     alpha5  = x[11*i+4];
1955dbdd7285SBarry Smith     alpha6  = x[11*i+5];
1956dbdd7285SBarry Smith     alpha7  = x[11*i+6];
1957dbdd7285SBarry Smith     alpha8  = x[11*i+7];
1958dbdd7285SBarry Smith     alpha9  = x[11*i+8];
1959dbdd7285SBarry Smith     alpha10 = x[11*i+9];
1960dbdd7285SBarry Smith     alpha11 = x[11*i+10];
1961dbdd7285SBarry Smith     while (n-->0) {
1962dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
1963dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
1964dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
1965dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
1966dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
1967dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
1968dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
1969dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
1970dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
1971dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
1972dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
1973dbdd7285SBarry Smith       idx++; v++;
1974dbdd7285SBarry Smith     }
1975dbdd7285SBarry Smith   }
19769566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
19779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
19789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1979dbdd7285SBarry Smith   PetscFunctionReturn(0);
1980dbdd7285SBarry Smith }
1981dbdd7285SBarry Smith 
1982dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983dbdd7285SBarry Smith {
1984dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986d2840e09SBarry Smith   const PetscScalar *x,*v;
1987d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1988d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1989d2840e09SBarry Smith   PetscInt          n,i;
1990dbdd7285SBarry Smith 
1991dbdd7285SBarry Smith   PetscFunctionBegin;
19929566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
19939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
19949566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1995dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1996dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1997dbdd7285SBarry Smith     v       = a->a + a->i[i];
1998dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1999dbdd7285SBarry Smith     alpha1  = x[11*i];
2000dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2001dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2002dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2003dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2004dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2005dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2006dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2007dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2008dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2009dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2010dbdd7285SBarry Smith     while (n-->0) {
2011dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2017dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2018dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2019dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2020dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2021dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2022dbdd7285SBarry Smith       idx++; v++;
2023dbdd7285SBarry Smith     }
2024dbdd7285SBarry Smith   }
20259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
20269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
20279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2028dbdd7285SBarry Smith   PetscFunctionReturn(0);
2029dbdd7285SBarry Smith }
2030dbdd7285SBarry Smith 
2031dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2032dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20332f7816d4SBarry Smith {
20342f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20352f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2036d2840e09SBarry Smith   const PetscScalar *x,*v;
2037d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20382f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2039d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2040d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20412f7816d4SBarry Smith 
20422f7816d4SBarry Smith   PetscFunctionBegin;
20439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
20449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
20452f7816d4SBarry Smith   idx  = a->j;
20462f7816d4SBarry Smith   v    = a->a;
20472f7816d4SBarry Smith   ii   = a->i;
20482f7816d4SBarry Smith 
20492f7816d4SBarry Smith   for (i=0; i<m; i++) {
20502f7816d4SBarry Smith     jrow  = ii[i];
20512f7816d4SBarry Smith     n     = ii[i+1] - jrow;
20522f7816d4SBarry Smith     sum1  = 0.0;
20532f7816d4SBarry Smith     sum2  = 0.0;
20542f7816d4SBarry Smith     sum3  = 0.0;
20552f7816d4SBarry Smith     sum4  = 0.0;
20562f7816d4SBarry Smith     sum5  = 0.0;
20572f7816d4SBarry Smith     sum6  = 0.0;
20582f7816d4SBarry Smith     sum7  = 0.0;
20592f7816d4SBarry Smith     sum8  = 0.0;
20602f7816d4SBarry Smith     sum9  = 0.0;
20612f7816d4SBarry Smith     sum10 = 0.0;
20622f7816d4SBarry Smith     sum11 = 0.0;
20632f7816d4SBarry Smith     sum12 = 0.0;
20642f7816d4SBarry Smith     sum13 = 0.0;
20652f7816d4SBarry Smith     sum14 = 0.0;
20662f7816d4SBarry Smith     sum15 = 0.0;
20672f7816d4SBarry Smith     sum16 = 0.0;
206826fbe8dcSKarl Rupp 
2069b3c51c66SMatthew Knepley     nonzerorow += (n>0);
20702f7816d4SBarry Smith     for (j=0; j<n; j++) {
20712f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
20722f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
20732f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
20742f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
20752f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
20762f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
20772f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
20782f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
20792f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
20802f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
20812f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
20822f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
20832f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
20842f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
20852f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
20862f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
20872f7816d4SBarry Smith       jrow++;
20882f7816d4SBarry Smith     }
20892f7816d4SBarry Smith     y[16*i]    = sum1;
20902f7816d4SBarry Smith     y[16*i+1]  = sum2;
20912f7816d4SBarry Smith     y[16*i+2]  = sum3;
20922f7816d4SBarry Smith     y[16*i+3]  = sum4;
20932f7816d4SBarry Smith     y[16*i+4]  = sum5;
20942f7816d4SBarry Smith     y[16*i+5]  = sum6;
20952f7816d4SBarry Smith     y[16*i+6]  = sum7;
20962f7816d4SBarry Smith     y[16*i+7]  = sum8;
20972f7816d4SBarry Smith     y[16*i+8]  = sum9;
20982f7816d4SBarry Smith     y[16*i+9]  = sum10;
20992f7816d4SBarry Smith     y[16*i+10] = sum11;
21002f7816d4SBarry Smith     y[16*i+11] = sum12;
21012f7816d4SBarry Smith     y[16*i+12] = sum13;
21022f7816d4SBarry Smith     y[16*i+13] = sum14;
21032f7816d4SBarry Smith     y[16*i+14] = sum15;
21042f7816d4SBarry Smith     y[16*i+15] = sum16;
21052f7816d4SBarry Smith   }
21062f7816d4SBarry Smith 
21079566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow));
21089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
21099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
21102f7816d4SBarry Smith   PetscFunctionReturn(0);
21112f7816d4SBarry Smith }
21122f7816d4SBarry Smith 
2113dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21142f7816d4SBarry Smith {
21152f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21162f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2117d2840e09SBarry Smith   const PetscScalar *x,*v;
2118d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21192f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2120d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2121d2840e09SBarry Smith   PetscInt          n,i;
21222f7816d4SBarry Smith 
21232f7816d4SBarry Smith   PetscFunctionBegin;
21249566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
21259566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
21269566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2127bfec09a0SHong Zhang 
21282f7816d4SBarry Smith   for (i=0; i<m; i++) {
2129bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2130bfec09a0SHong Zhang     v       = a->a + a->i[i];
21312f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21322f7816d4SBarry Smith     alpha1  = x[16*i];
21332f7816d4SBarry Smith     alpha2  = x[16*i+1];
21342f7816d4SBarry Smith     alpha3  = x[16*i+2];
21352f7816d4SBarry Smith     alpha4  = x[16*i+3];
21362f7816d4SBarry Smith     alpha5  = x[16*i+4];
21372f7816d4SBarry Smith     alpha6  = x[16*i+5];
21382f7816d4SBarry Smith     alpha7  = x[16*i+6];
21392f7816d4SBarry Smith     alpha8  = x[16*i+7];
21402f7816d4SBarry Smith     alpha9  = x[16*i+8];
21412f7816d4SBarry Smith     alpha10 = x[16*i+9];
21422f7816d4SBarry Smith     alpha11 = x[16*i+10];
21432f7816d4SBarry Smith     alpha12 = x[16*i+11];
21442f7816d4SBarry Smith     alpha13 = x[16*i+12];
21452f7816d4SBarry Smith     alpha14 = x[16*i+13];
21462f7816d4SBarry Smith     alpha15 = x[16*i+14];
21472f7816d4SBarry Smith     alpha16 = x[16*i+15];
21482f7816d4SBarry Smith     while (n-->0) {
21492f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
21502f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
21512f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
21522f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
21532f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
21542f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
21552f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
21562f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
21572f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
21582f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
21592f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
21602f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
21612f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
21622f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
21632f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
21642f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
21652f7816d4SBarry Smith       idx++; v++;
21662f7816d4SBarry Smith     }
21672f7816d4SBarry Smith   }
21689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
21699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
21709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
21712f7816d4SBarry Smith   PetscFunctionReturn(0);
21722f7816d4SBarry Smith }
21732f7816d4SBarry Smith 
2174dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
21752f7816d4SBarry Smith {
21762f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21772f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2178d2840e09SBarry Smith   const PetscScalar *x,*v;
2179d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21802f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2182b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
21832f7816d4SBarry Smith 
21842f7816d4SBarry Smith   PetscFunctionBegin;
21859566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
21869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
21879566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
21882f7816d4SBarry Smith   idx  = a->j;
21892f7816d4SBarry Smith   v    = a->a;
21902f7816d4SBarry Smith   ii   = a->i;
21912f7816d4SBarry Smith 
21922f7816d4SBarry Smith   for (i=0; i<m; i++) {
21932f7816d4SBarry Smith     jrow  = ii[i];
21942f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21952f7816d4SBarry Smith     sum1  = 0.0;
21962f7816d4SBarry Smith     sum2  = 0.0;
21972f7816d4SBarry Smith     sum3  = 0.0;
21982f7816d4SBarry Smith     sum4  = 0.0;
21992f7816d4SBarry Smith     sum5  = 0.0;
22002f7816d4SBarry Smith     sum6  = 0.0;
22012f7816d4SBarry Smith     sum7  = 0.0;
22022f7816d4SBarry Smith     sum8  = 0.0;
22032f7816d4SBarry Smith     sum9  = 0.0;
22042f7816d4SBarry Smith     sum10 = 0.0;
22052f7816d4SBarry Smith     sum11 = 0.0;
22062f7816d4SBarry Smith     sum12 = 0.0;
22072f7816d4SBarry Smith     sum13 = 0.0;
22082f7816d4SBarry Smith     sum14 = 0.0;
22092f7816d4SBarry Smith     sum15 = 0.0;
22102f7816d4SBarry Smith     sum16 = 0.0;
22112f7816d4SBarry Smith     for (j=0; j<n; j++) {
22122f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22132f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22142f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22152f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22162f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22172f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22182f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22192f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22202f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22212f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22222f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22232f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22242f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22252f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22262f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22272f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22282f7816d4SBarry Smith       jrow++;
22292f7816d4SBarry Smith     }
22302f7816d4SBarry Smith     y[16*i]    += sum1;
22312f7816d4SBarry Smith     y[16*i+1]  += sum2;
22322f7816d4SBarry Smith     y[16*i+2]  += sum3;
22332f7816d4SBarry Smith     y[16*i+3]  += sum4;
22342f7816d4SBarry Smith     y[16*i+4]  += sum5;
22352f7816d4SBarry Smith     y[16*i+5]  += sum6;
22362f7816d4SBarry Smith     y[16*i+6]  += sum7;
22372f7816d4SBarry Smith     y[16*i+7]  += sum8;
22382f7816d4SBarry Smith     y[16*i+8]  += sum9;
22392f7816d4SBarry Smith     y[16*i+9]  += sum10;
22402f7816d4SBarry Smith     y[16*i+10] += sum11;
22412f7816d4SBarry Smith     y[16*i+11] += sum12;
22422f7816d4SBarry Smith     y[16*i+12] += sum13;
22432f7816d4SBarry Smith     y[16*i+13] += sum14;
22442f7816d4SBarry Smith     y[16*i+14] += sum15;
22452f7816d4SBarry Smith     y[16*i+15] += sum16;
22462f7816d4SBarry Smith   }
22472f7816d4SBarry Smith 
22489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
22499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
22509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
22512f7816d4SBarry Smith   PetscFunctionReturn(0);
22522f7816d4SBarry Smith }
22532f7816d4SBarry Smith 
2254dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22552f7816d4SBarry Smith {
22562f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22572f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2258d2840e09SBarry Smith   const PetscScalar *x,*v;
2259d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22602f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2261d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2262d2840e09SBarry Smith   PetscInt          n,i;
22632f7816d4SBarry Smith 
22642f7816d4SBarry Smith   PetscFunctionBegin;
22659566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
22669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
22679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
22682f7816d4SBarry Smith   for (i=0; i<m; i++) {
2269bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2270bfec09a0SHong Zhang     v       = a->a + a->i[i];
22712f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
22722f7816d4SBarry Smith     alpha1  = x[16*i];
22732f7816d4SBarry Smith     alpha2  = x[16*i+1];
22742f7816d4SBarry Smith     alpha3  = x[16*i+2];
22752f7816d4SBarry Smith     alpha4  = x[16*i+3];
22762f7816d4SBarry Smith     alpha5  = x[16*i+4];
22772f7816d4SBarry Smith     alpha6  = x[16*i+5];
22782f7816d4SBarry Smith     alpha7  = x[16*i+6];
22792f7816d4SBarry Smith     alpha8  = x[16*i+7];
22802f7816d4SBarry Smith     alpha9  = x[16*i+8];
22812f7816d4SBarry Smith     alpha10 = x[16*i+9];
22822f7816d4SBarry Smith     alpha11 = x[16*i+10];
22832f7816d4SBarry Smith     alpha12 = x[16*i+11];
22842f7816d4SBarry Smith     alpha13 = x[16*i+12];
22852f7816d4SBarry Smith     alpha14 = x[16*i+13];
22862f7816d4SBarry Smith     alpha15 = x[16*i+14];
22872f7816d4SBarry Smith     alpha16 = x[16*i+15];
22882f7816d4SBarry Smith     while (n-->0) {
22892f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22902f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22912f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22922f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22932f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22942f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22952f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22962f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22972f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22982f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22992f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23002f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23012f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23022f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23032f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23042f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23052f7816d4SBarry Smith       idx++; v++;
23062f7816d4SBarry Smith     }
23072f7816d4SBarry Smith   }
23089566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
23099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
23109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
231166ed3db0SBarry Smith   PetscFunctionReturn(0);
231266ed3db0SBarry Smith }
231366ed3db0SBarry Smith 
2314ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2315ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2316ed1c418cSBarry Smith {
2317ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2318ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2319d2840e09SBarry Smith   const PetscScalar *x,*v;
2320d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2321ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2322d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2323d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2324ed1c418cSBarry Smith 
2325ed1c418cSBarry Smith   PetscFunctionBegin;
23269566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
23279566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2328ed1c418cSBarry Smith   idx  = a->j;
2329ed1c418cSBarry Smith   v    = a->a;
2330ed1c418cSBarry Smith   ii   = a->i;
2331ed1c418cSBarry Smith 
2332ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2333ed1c418cSBarry Smith     jrow  = ii[i];
2334ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2335ed1c418cSBarry Smith     sum1  = 0.0;
2336ed1c418cSBarry Smith     sum2  = 0.0;
2337ed1c418cSBarry Smith     sum3  = 0.0;
2338ed1c418cSBarry Smith     sum4  = 0.0;
2339ed1c418cSBarry Smith     sum5  = 0.0;
2340ed1c418cSBarry Smith     sum6  = 0.0;
2341ed1c418cSBarry Smith     sum7  = 0.0;
2342ed1c418cSBarry Smith     sum8  = 0.0;
2343ed1c418cSBarry Smith     sum9  = 0.0;
2344ed1c418cSBarry Smith     sum10 = 0.0;
2345ed1c418cSBarry Smith     sum11 = 0.0;
2346ed1c418cSBarry Smith     sum12 = 0.0;
2347ed1c418cSBarry Smith     sum13 = 0.0;
2348ed1c418cSBarry Smith     sum14 = 0.0;
2349ed1c418cSBarry Smith     sum15 = 0.0;
2350ed1c418cSBarry Smith     sum16 = 0.0;
2351ed1c418cSBarry Smith     sum17 = 0.0;
2352ed1c418cSBarry Smith     sum18 = 0.0;
235326fbe8dcSKarl Rupp 
2354ed1c418cSBarry Smith     nonzerorow += (n>0);
2355ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2356ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2357ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2358ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2359ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2360ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2361ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2362ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2363ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2364ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2365ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2366ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2367ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2368ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2369ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2370ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2371ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2372ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2373ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2374ed1c418cSBarry Smith       jrow++;
2375ed1c418cSBarry Smith     }
2376ed1c418cSBarry Smith     y[18*i]    = sum1;
2377ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2378ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2379ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2380ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2381ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2382ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2383ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2384ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2385ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2386ed1c418cSBarry Smith     y[18*i+10] = sum11;
2387ed1c418cSBarry Smith     y[18*i+11] = sum12;
2388ed1c418cSBarry Smith     y[18*i+12] = sum13;
2389ed1c418cSBarry Smith     y[18*i+13] = sum14;
2390ed1c418cSBarry Smith     y[18*i+14] = sum15;
2391ed1c418cSBarry Smith     y[18*i+15] = sum16;
2392ed1c418cSBarry Smith     y[18*i+16] = sum17;
2393ed1c418cSBarry Smith     y[18*i+17] = sum18;
2394ed1c418cSBarry Smith   }
2395ed1c418cSBarry Smith 
23969566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow));
23979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
23989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
2399ed1c418cSBarry Smith   PetscFunctionReturn(0);
2400ed1c418cSBarry Smith }
2401ed1c418cSBarry Smith 
2402ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2403ed1c418cSBarry Smith {
2404ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2405ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2406d2840e09SBarry Smith   const PetscScalar *x,*v;
2407d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2408ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2409d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2410d2840e09SBarry Smith   PetscInt          n,i;
2411ed1c418cSBarry Smith 
2412ed1c418cSBarry Smith   PetscFunctionBegin;
24139566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
24149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
24159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2416ed1c418cSBarry Smith 
2417ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2418ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2419ed1c418cSBarry Smith     v       = a->a + a->i[i];
2420ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2421ed1c418cSBarry Smith     alpha1  = x[18*i];
2422ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2423ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2424ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2425ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2426ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2427ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2428ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2429ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2430ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2431ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2432ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2433ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2434ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2435ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2436ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2437ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2438ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2439ed1c418cSBarry Smith     while (n-->0) {
2440ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2441ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2442ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2443ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2444ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2445ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2446ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2447ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2448ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2449ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2450ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2451ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2452ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2453ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2454ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2455ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2456ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2457ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2458ed1c418cSBarry Smith       idx++; v++;
2459ed1c418cSBarry Smith     }
2460ed1c418cSBarry Smith   }
24619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
24629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
24639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
2464ed1c418cSBarry Smith   PetscFunctionReturn(0);
2465ed1c418cSBarry Smith }
2466ed1c418cSBarry Smith 
2467ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2468ed1c418cSBarry Smith {
2469ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2470ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2471d2840e09SBarry Smith   const PetscScalar *x,*v;
2472d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2473ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2474d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2475ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2476ed1c418cSBarry Smith 
2477ed1c418cSBarry Smith   PetscFunctionBegin;
24789566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
24799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
24809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
2481ed1c418cSBarry Smith   idx  = a->j;
2482ed1c418cSBarry Smith   v    = a->a;
2483ed1c418cSBarry Smith   ii   = a->i;
2484ed1c418cSBarry Smith 
2485ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2486ed1c418cSBarry Smith     jrow  = ii[i];
2487ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2488ed1c418cSBarry Smith     sum1  = 0.0;
2489ed1c418cSBarry Smith     sum2  = 0.0;
2490ed1c418cSBarry Smith     sum3  = 0.0;
2491ed1c418cSBarry Smith     sum4  = 0.0;
2492ed1c418cSBarry Smith     sum5  = 0.0;
2493ed1c418cSBarry Smith     sum6  = 0.0;
2494ed1c418cSBarry Smith     sum7  = 0.0;
2495ed1c418cSBarry Smith     sum8  = 0.0;
2496ed1c418cSBarry Smith     sum9  = 0.0;
2497ed1c418cSBarry Smith     sum10 = 0.0;
2498ed1c418cSBarry Smith     sum11 = 0.0;
2499ed1c418cSBarry Smith     sum12 = 0.0;
2500ed1c418cSBarry Smith     sum13 = 0.0;
2501ed1c418cSBarry Smith     sum14 = 0.0;
2502ed1c418cSBarry Smith     sum15 = 0.0;
2503ed1c418cSBarry Smith     sum16 = 0.0;
2504ed1c418cSBarry Smith     sum17 = 0.0;
2505ed1c418cSBarry Smith     sum18 = 0.0;
2506ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2507ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2508ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2509ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2510ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2511ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2512ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2513ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2514ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2515ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2516ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2517ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2518ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2519ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2520ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2521ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2522ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2523ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2524ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2525ed1c418cSBarry Smith       jrow++;
2526ed1c418cSBarry Smith     }
2527ed1c418cSBarry Smith     y[18*i]    += sum1;
2528ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2529ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2530ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2531ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2532ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2533ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2534ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2535ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2536ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2537ed1c418cSBarry Smith     y[18*i+10] += sum11;
2538ed1c418cSBarry Smith     y[18*i+11] += sum12;
2539ed1c418cSBarry Smith     y[18*i+12] += sum13;
2540ed1c418cSBarry Smith     y[18*i+13] += sum14;
2541ed1c418cSBarry Smith     y[18*i+14] += sum15;
2542ed1c418cSBarry Smith     y[18*i+15] += sum16;
2543ed1c418cSBarry Smith     y[18*i+16] += sum17;
2544ed1c418cSBarry Smith     y[18*i+17] += sum18;
2545ed1c418cSBarry Smith   }
2546ed1c418cSBarry Smith 
25479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
25489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
25499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2550ed1c418cSBarry Smith   PetscFunctionReturn(0);
2551ed1c418cSBarry Smith }
2552ed1c418cSBarry Smith 
2553ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2554ed1c418cSBarry Smith {
2555ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2556ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2557d2840e09SBarry Smith   const PetscScalar *x,*v;
2558d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2559ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2560d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2561d2840e09SBarry Smith   PetscInt          n,i;
2562ed1c418cSBarry Smith 
2563ed1c418cSBarry Smith   PetscFunctionBegin;
25649566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
25659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
25669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
2567ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2568ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2569ed1c418cSBarry Smith     v       = a->a + a->i[i];
2570ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2571ed1c418cSBarry Smith     alpha1  = x[18*i];
2572ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2573ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2574ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2575ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2576ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2577ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2578ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2579ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2580ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2581ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2582ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2583ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2584ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2585ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2586ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2587ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2588ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2589ed1c418cSBarry Smith     while (n-->0) {
2590ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2593ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2594ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2595ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2596ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2597ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2598ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2599ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2600ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2601ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2602ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2603ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2604ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2605ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2606ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2607ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2608ed1c418cSBarry Smith       idx++; v++;
2609ed1c418cSBarry Smith     }
2610ed1c418cSBarry Smith   }
26119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
26129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2614ed1c418cSBarry Smith   PetscFunctionReturn(0);
2615ed1c418cSBarry Smith }
2616ed1c418cSBarry Smith 
26176861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26186861f193SBarry Smith {
26196861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26206861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26216861f193SBarry Smith   const PetscScalar *x,*v;
26226861f193SBarry Smith   PetscScalar       *y,*sums;
26236861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26246861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26256861f193SBarry Smith 
26266861f193SBarry Smith   PetscFunctionBegin;
26279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26289566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
26299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
26306861f193SBarry Smith   idx  = a->j;
26316861f193SBarry Smith   v    = a->a;
26326861f193SBarry Smith   ii   = a->i;
26336861f193SBarry Smith 
26346861f193SBarry Smith   for (i=0; i<m; i++) {
26356861f193SBarry Smith     jrow = ii[i];
26366861f193SBarry Smith     n    = ii[i+1] - jrow;
26376861f193SBarry Smith     sums = y + dof*i;
26386861f193SBarry Smith     for (j=0; j<n; j++) {
26396861f193SBarry Smith       for (k=0; k<dof; k++) {
26406861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26416861f193SBarry Smith       }
26426861f193SBarry Smith       jrow++;
26436861f193SBarry Smith     }
26446861f193SBarry Smith   }
26456861f193SBarry Smith 
26469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
26479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
26496861f193SBarry Smith   PetscFunctionReturn(0);
26506861f193SBarry Smith }
26516861f193SBarry Smith 
26526861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
26536861f193SBarry Smith {
26546861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26556861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26566861f193SBarry Smith   const PetscScalar *x,*v;
26576861f193SBarry Smith   PetscScalar       *y,*sums;
26586861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26596861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26606861f193SBarry Smith 
26616861f193SBarry Smith   PetscFunctionBegin;
26629566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
26639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
26656861f193SBarry Smith   idx  = a->j;
26666861f193SBarry Smith   v    = a->a;
26676861f193SBarry Smith   ii   = a->i;
26686861f193SBarry Smith 
26696861f193SBarry Smith   for (i=0; i<m; i++) {
26706861f193SBarry Smith     jrow = ii[i];
26716861f193SBarry Smith     n    = ii[i+1] - jrow;
26726861f193SBarry Smith     sums = y + dof*i;
26736861f193SBarry Smith     for (j=0; j<n; j++) {
26746861f193SBarry Smith       for (k=0; k<dof; k++) {
26756861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26766861f193SBarry Smith       }
26776861f193SBarry Smith       jrow++;
26786861f193SBarry Smith     }
26796861f193SBarry Smith   }
26806861f193SBarry Smith 
26819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
26829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
26846861f193SBarry Smith   PetscFunctionReturn(0);
26856861f193SBarry Smith }
26866861f193SBarry Smith 
26876861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26886861f193SBarry Smith {
26896861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26906861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26916861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
26926861f193SBarry Smith   PetscScalar       *y;
26936861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
26946861f193SBarry Smith   PetscInt          n,i,k;
26956861f193SBarry Smith 
26966861f193SBarry Smith   PetscFunctionBegin;
26979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26989566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
26999566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
27006861f193SBarry Smith   for (i=0; i<m; i++) {
27016861f193SBarry Smith     idx   = a->j + a->i[i];
27026861f193SBarry Smith     v     = a->a + a->i[i];
27036861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27046861f193SBarry Smith     alpha = x + dof*i;
27056861f193SBarry Smith     while (n-->0) {
27066861f193SBarry Smith       for (k=0; k<dof; k++) {
27076861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27086861f193SBarry Smith       }
270983ce7366SBarry Smith       idx++; v++;
27106861f193SBarry Smith     }
27116861f193SBarry Smith   }
27129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
27139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
27149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
27156861f193SBarry Smith   PetscFunctionReturn(0);
27166861f193SBarry Smith }
27176861f193SBarry Smith 
27186861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27196861f193SBarry Smith {
27206861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27216861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27226861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27236861f193SBarry Smith   PetscScalar       *y;
27246861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27256861f193SBarry Smith   PetscInt          n,i,k;
27266861f193SBarry Smith 
27276861f193SBarry Smith   PetscFunctionBegin;
27289566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
27299566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
27309566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
27316861f193SBarry Smith   for (i=0; i<m; i++) {
27326861f193SBarry Smith     idx   = a->j + a->i[i];
27336861f193SBarry Smith     v     = a->a + a->i[i];
27346861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27356861f193SBarry Smith     alpha = x + dof*i;
27366861f193SBarry Smith     while (n-->0) {
27376861f193SBarry Smith       for (k=0; k<dof; k++) {
27386861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27396861f193SBarry Smith       }
274083ce7366SBarry Smith       idx++; v++;
27416861f193SBarry Smith     }
27426861f193SBarry Smith   }
27439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
27449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
27459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
27466861f193SBarry Smith   PetscFunctionReturn(0);
27476861f193SBarry Smith }
27486861f193SBarry Smith 
2749f4a53059SBarry Smith /*===================================================================================*/
2750dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2751f4a53059SBarry Smith {
27524cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2753f4a53059SBarry Smith 
2754b24ad042SBarry Smith   PetscFunctionBegin;
2755f4a53059SBarry Smith   /* start the scatter */
27569566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27579566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy));
27589566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27599566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy));
2760f4a53059SBarry Smith   PetscFunctionReturn(0);
2761f4a53059SBarry Smith }
2762f4a53059SBarry Smith 
2763dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
27644cb9d9b8SBarry Smith {
27654cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2766b24ad042SBarry Smith 
27674cb9d9b8SBarry Smith   PetscFunctionBegin;
27689566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
27699566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy));
27709566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
27719566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
27724cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27734cb9d9b8SBarry Smith }
27744cb9d9b8SBarry Smith 
2775dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
27764cb9d9b8SBarry Smith {
27774cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
27784cb9d9b8SBarry Smith 
2779b24ad042SBarry Smith   PetscFunctionBegin;
27804cb9d9b8SBarry Smith   /* start the scatter */
27819566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27829566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz));
27839566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27849566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
27854cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27864cb9d9b8SBarry Smith }
27874cb9d9b8SBarry Smith 
2788dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
27894cb9d9b8SBarry Smith {
27904cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2791b24ad042SBarry Smith 
27924cb9d9b8SBarry Smith   PetscFunctionBegin;
27939566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
27949566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz));
27959566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
27969566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
27974cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27984cb9d9b8SBarry Smith }
27994cb9d9b8SBarry Smith 
280095ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28014222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
28024222ddf1SHong Zhang {
28034222ddf1SHong Zhang   Mat_Product    *product = C->product;
28044222ddf1SHong Zhang 
28054222ddf1SHong Zhang   PetscFunctionBegin;
28064222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
28074222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
280898921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
28094222ddf1SHong Zhang   PetscFunctionReturn(0);
28104222ddf1SHong Zhang }
28114222ddf1SHong Zhang 
28124222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
28134222ddf1SHong Zhang {
28144222ddf1SHong Zhang   Mat_Product    *product = C->product;
28154222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
28164222ddf1SHong Zhang   Mat            A=product->A,P=product->B;
28174222ddf1SHong Zhang   PetscInt       alg=1; /* set default algorithm */
28184222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
28194222ddf1SHong Zhang   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
28204222ddf1SHong Zhang   PetscInt       nalg=4;
28214222ddf1SHong Zhang #else
28224222ddf1SHong Zhang   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
28234222ddf1SHong Zhang   PetscInt       nalg=5;
28244222ddf1SHong Zhang #endif
28254222ddf1SHong Zhang 
28264222ddf1SHong Zhang   PetscFunctionBegin;
282708401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
28284222ddf1SHong Zhang 
28294222ddf1SHong Zhang   /* PtAP */
28304222ddf1SHong Zhang   /* Check matrix local sizes */
2831aed4548fSBarry Smith   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2832aed4548fSBarry Smith   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
28334222ddf1SHong Zhang 
28344222ddf1SHong Zhang   /* Set the default algorithm */
28359566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
28364222ddf1SHong Zhang   if (flg) {
28379566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
28384222ddf1SHong Zhang   }
28394222ddf1SHong Zhang 
28404222ddf1SHong Zhang   /* Get runtime option */
2841d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
28429566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg));
28434222ddf1SHong Zhang   if (flg) {
28449566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
28454222ddf1SHong Zhang   }
2846d0609cedSBarry Smith   PetscOptionsEnd();
28474222ddf1SHong Zhang 
28489566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg));
28494222ddf1SHong Zhang   if (flg) {
28504222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28514222ddf1SHong Zhang     PetscFunctionReturn(0);
28524222ddf1SHong Zhang   }
28534222ddf1SHong Zhang 
28549566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg));
28554222ddf1SHong Zhang   if (flg) {
28564222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28574222ddf1SHong Zhang     PetscFunctionReturn(0);
28584222ddf1SHong Zhang   }
28594222ddf1SHong Zhang 
28604222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
28619566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
28629566063dSJacob Faibussowitsch   PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P));
28639566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
28644222ddf1SHong Zhang   PetscFunctionReturn(0);
28654222ddf1SHong Zhang }
28664222ddf1SHong Zhang 
28674222ddf1SHong Zhang /* ----------------------------------------------------------------*/
28684222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
28697ba1a0bfSKris Buschelman {
28700298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
28717ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
28727ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
28737ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2874d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
28757ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2876d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2877d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
28787ba1a0bfSKris Buschelman   MatScalar          *ca;
2879d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
28807ba1a0bfSKris Buschelman 
28817ba1a0bfSKris Buschelman   PetscFunctionBegin;
28827ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28839566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
28847ba1a0bfSKris Buschelman 
28857ba1a0bfSKris Buschelman   cn = pn*ppdof;
28867ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28877ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn+1,&ci));
28897ba1a0bfSKris Buschelman   ci[0] = 0;
28907ba1a0bfSKris Buschelman 
28917ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow));
28939566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow,an));
28949566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow,cn));
28957ba1a0bfSKris Buschelman 
28967ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28977ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28987ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28999566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space));
29007ba1a0bfSKris Buschelman   current_space = free_space;
29017ba1a0bfSKris Buschelman 
29027ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29037ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29047ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29057ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29067ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29077ba1a0bfSKris Buschelman       ptanzi = 0;
29087ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29097ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29107ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29117ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29127ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29137ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29147ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29157ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29167ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29177ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29187ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29197ba1a0bfSKris Buschelman           }
29207ba1a0bfSKris Buschelman         }
29217ba1a0bfSKris Buschelman       }
29227ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29237ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29247ba1a0bfSKris Buschelman       cnzi = 0;
29257ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29267ba1a0bfSKris Buschelman         /* Get offset within block of P */
29277ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29287ba1a0bfSKris Buschelman         /* Get block row of P */
29297ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29307ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29317ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29327ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29337ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29347ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29357ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29367ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29377ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29387ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
29397ba1a0bfSKris Buschelman           }
29407ba1a0bfSKris Buschelman         }
29417ba1a0bfSKris Buschelman       }
29427ba1a0bfSKris Buschelman 
29437ba1a0bfSKris Buschelman       /* sort sparserow */
29449566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi,sparserow));
29457ba1a0bfSKris Buschelman 
29467ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29477ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
29487ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
29499566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
29507ba1a0bfSKris Buschelman       }
29517ba1a0bfSKris Buschelman 
29527ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29539566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi));
295426fbe8dcSKarl Rupp 
29557ba1a0bfSKris Buschelman       current_space->array           += cnzi;
29567ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
29577ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29587ba1a0bfSKris Buschelman 
295926fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
296026fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
296126fbe8dcSKarl Rupp 
29627ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29637ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29647ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
29657ba1a0bfSKris Buschelman     }
29667ba1a0bfSKris Buschelman   }
29677ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29687ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29697ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn]+1,&cj));
29719566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
29729566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow));
29737ba1a0bfSKris Buschelman 
29747ba1a0bfSKris Buschelman   /* Allocate space for ca */
29759566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn]+1,&ca));
29767ba1a0bfSKris Buschelman 
29777ba1a0bfSKris Buschelman   /* put together the new matrix */
29789566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C));
29799566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C,pp->dof));
29807ba1a0bfSKris Buschelman 
29817ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29827ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29834222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
2984e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2985e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29867ba1a0bfSKris Buschelman   c->nonew   = 0;
298726fbe8dcSKarl Rupp 
29884222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29894222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29907ba1a0bfSKris Buschelman 
29917ba1a0bfSKris Buschelman   /* Clean up. */
29929566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
29937ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29947ba1a0bfSKris Buschelman }
29957ba1a0bfSKris Buschelman 
29967ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
29977ba1a0bfSKris Buschelman {
29987ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29997ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
30007ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
30017ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30027ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30037ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3004a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3005d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3006d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3007d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3008a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3009d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30107ba1a0bfSKris Buschelman 
30117ba1a0bfSKris Buschelman   PetscFunctionBegin;
30127ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30139566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense));
30147ba1a0bfSKris Buschelman 
30157ba1a0bfSKris Buschelman   /* Clear old values in C */
30169566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca,ci[cm]));
30177ba1a0bfSKris Buschelman 
30187ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30197ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30207ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30217ba1a0bfSKris Buschelman     apnzj = 0;
30227ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30237ba1a0bfSKris Buschelman       /* Get offset within block of P */
30247ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30257ba1a0bfSKris Buschelman       /* Get block row of P */
30267ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30277ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30287ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30297ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30307ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30317ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30327ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30337ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30347ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30357ba1a0bfSKris Buschelman         }
30367ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30377ba1a0bfSKris Buschelman       }
30389566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0*pnzj));
30397ba1a0bfSKris Buschelman       aa++;
30407ba1a0bfSKris Buschelman     }
30417ba1a0bfSKris Buschelman 
30427ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30437ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30449566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj,apj));
30457ba1a0bfSKris Buschelman 
30467ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30477ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
30487ba1a0bfSKris Buschelman     pshift  = i%ppdof;
30497ba1a0bfSKris Buschelman     poffset = pi[prow];
30507ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
30517ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30527ba1a0bfSKris Buschelman     pJ = pj+poffset;
30537ba1a0bfSKris Buschelman     pA = pa+poffset;
30547ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
30557ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
30567ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30577ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30587ba1a0bfSKris Buschelman       pJ++;
30597ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30607ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
306126fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
30627ba1a0bfSKris Buschelman       }
30639566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0*apnzj));
30647ba1a0bfSKris Buschelman       pA++;
30657ba1a0bfSKris Buschelman     }
30667ba1a0bfSKris Buschelman 
30677ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30687ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
30697ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30707ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30717ba1a0bfSKris Buschelman     }
30727ba1a0bfSKris Buschelman   }
30737ba1a0bfSKris Buschelman 
30747ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30759566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
30769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
30779566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa,apj,apjdense));
307895ddefa2SHong Zhang   PetscFunctionReturn(0);
307995ddefa2SHong Zhang }
30807ba1a0bfSKris Buschelman 
30814222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
30822121bac1SHong Zhang {
30834222ddf1SHong Zhang   Mat_Product         *product = C->product;
30844222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
30852121bac1SHong Zhang 
30862121bac1SHong Zhang   PetscFunctionBegin;
30879566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C));
30882121bac1SHong Zhang   PetscFunctionReturn(0);
30892121bac1SHong Zhang }
30902121bac1SHong Zhang 
3091f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
309295ddefa2SHong Zhang {
309395ddefa2SHong Zhang   PetscFunctionBegin;
30943e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
309595ddefa2SHong Zhang }
309695ddefa2SHong Zhang 
3097f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
309895ddefa2SHong Zhang {
309995ddefa2SHong Zhang   PetscFunctionBegin;
31003e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31017ba1a0bfSKris Buschelman }
31025c65b9ecSFande Kong 
3103bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3104bc8e477aSFande Kong 
3105bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3106bc8e477aSFande Kong {
3107bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3108bc8e477aSFande Kong 
3109bc8e477aSFande Kong   PetscFunctionBegin;
311034bcad68SFande Kong 
31119566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C));
3112bc8e477aSFande Kong   PetscFunctionReturn(0);
3113bc8e477aSFande Kong }
3114bc8e477aSFande Kong 
31154222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3116bc8e477aSFande Kong 
31174222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3118bc8e477aSFande Kong {
3119bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3120bc8e477aSFande Kong 
3121bc8e477aSFande Kong   PetscFunctionBegin;
31229566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C));
31234222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3124bc8e477aSFande Kong   PetscFunctionReturn(0);
3125bc8e477aSFande Kong }
3126bc8e477aSFande Kong 
3127bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3128bc8e477aSFande Kong 
3129bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3130bc8e477aSFande Kong {
3131bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3132bc8e477aSFande Kong 
3133bc8e477aSFande Kong   PetscFunctionBegin;
313434bcad68SFande Kong 
31359566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C));
3136bc8e477aSFande Kong   PetscFunctionReturn(0);
3137bc8e477aSFande Kong }
3138bc8e477aSFande Kong 
31394222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3140bc8e477aSFande Kong 
31414222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3142bc8e477aSFande Kong {
3143bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3144bc8e477aSFande Kong 
3145bc8e477aSFande Kong   PetscFunctionBegin;
314634bcad68SFande Kong 
31479566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C));
31484222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3149bc8e477aSFande Kong   PetscFunctionReturn(0);
3150bc8e477aSFande Kong }
3151bc8e477aSFande Kong 
31524222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3153bc8e477aSFande Kong {
31544222ddf1SHong Zhang   Mat_Product         *product = C->product;
31554222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
31564222ddf1SHong Zhang   PetscBool           flg;
3157bc8e477aSFande Kong 
3158bc8e477aSFande Kong   PetscFunctionBegin;
31599566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"allatonce",&flg));
31604222ddf1SHong Zhang   if (flg) {
31619566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C));
31624222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31634222ddf1SHong Zhang     PetscFunctionReturn(0);
3164bc8e477aSFande Kong   }
316534bcad68SFande Kong 
31669566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg));
31674222ddf1SHong Zhang   if (flg) {
31689566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C));
31694222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31704222ddf1SHong Zhang     PetscFunctionReturn(0);
31714222ddf1SHong Zhang   }
317234bcad68SFande Kong 
31734222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3174bc8e477aSFande Kong }
3175bc8e477aSFande Kong 
3176cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31779c4fc4c7SBarry Smith {
31789c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3179ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
31809c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
31810fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3182ba8c8a56SBarry Smith   PetscInt       *cols;
3183ba8c8a56SBarry Smith   PetscScalar    *vals;
31849c4fc4c7SBarry Smith 
31859c4fc4c7SBarry Smith   PetscFunctionBegin;
31869566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a,&m,&n));
31879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof*m,&ilen));
31889c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31899c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
319026fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
31919c4fc4c7SBarry Smith   }
31929566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
31939566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n));
31949566063dSJacob Faibussowitsch   PetscCall(MatSetType(B,newtype));
31959566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B,0,ilen));
31969566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax,&icols));
31989c4fc4c7SBarry Smith   ii   = 0;
31999c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32009566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals));
32010fd73130SBarry Smith     for (j=0; j<dof; j++) {
320226fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
32039566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
32049c4fc4c7SBarry Smith       ii++;
32059c4fc4c7SBarry Smith     }
32069566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals));
32079c4fc4c7SBarry Smith   }
32089566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
32099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
32109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3211ceb03754SKris Buschelman 
3212511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32139566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A,&B));
3214c3d102feSKris Buschelman   } else {
3215ceb03754SKris Buschelman     *newmat = B;
3216c3d102feSKris Buschelman   }
32179c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32189c4fc4c7SBarry Smith }
32199c4fc4c7SBarry Smith 
3220c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3221be1d678aSKris Buschelman 
3222cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32230fd73130SBarry Smith {
32240fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3225ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32260fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32270fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
32280fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
32290fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
32300298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
32310298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
32320fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
32330fd73130SBarry Smith   PetscScalar    *vals,*ovals;
32340fd73130SBarry Smith 
32350fd73130SBarry Smith   PetscFunctionBegin;
32369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz));
3237d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32380fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32390fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32400fd73130SBarry Smith     for (j=0; j<dof; j++) {
32410fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32420fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32430fd73130SBarry Smith     }
32440fd73130SBarry Smith   }
32459566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
32469566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
32479566063dSJacob Faibussowitsch   PetscCall(MatSetType(B,newtype));
32489566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz));
32499566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B,dof));
32509566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz,onz));
32510fd73130SBarry Smith 
32529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols));
3253d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3254d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
32550fd73130SBarry Smith   garray = mpiaij->garray;
32560fd73130SBarry Smith 
32570fd73130SBarry Smith   ii = rstart;
3258d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32599566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
32609566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
32610fd73130SBarry Smith     for (j=0; j<dof; j++) {
32620fd73130SBarry Smith       for (k=0; k<ncols; k++) {
32630fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
32640fd73130SBarry Smith       }
32650fd73130SBarry Smith       for (k=0; k<oncols; k++) {
32660fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
32670fd73130SBarry Smith       }
32689566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
32699566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES));
32700fd73130SBarry Smith       ii++;
32710fd73130SBarry Smith     }
32729566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
32739566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
32740fd73130SBarry Smith   }
32759566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols,oicols));
32760fd73130SBarry Smith 
32779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
32789566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3279ceb03754SKris Buschelman 
3280511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32817adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32827adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
328326fbe8dcSKarl Rupp 
32849566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A,&B));
328526fbe8dcSKarl Rupp 
32867adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3287c3d102feSKris Buschelman   } else {
3288ceb03754SKris Buschelman     *newmat = B;
3289c3d102feSKris Buschelman   }
32900fd73130SBarry Smith   PetscFunctionReturn(0);
32910fd73130SBarry Smith }
32920fd73130SBarry Smith 
32937dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
32949e516c8fSBarry Smith {
32959e516c8fSBarry Smith   Mat            A;
32969e516c8fSBarry Smith 
32979e516c8fSBarry Smith   PetscFunctionBegin;
32989566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
32999566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
33009566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
33019e516c8fSBarry Smith   PetscFunctionReturn(0);
33029e516c8fSBarry Smith }
33030fd73130SBarry Smith 
3304ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3305ec626240SStefano Zampini {
3306ec626240SStefano Zampini   Mat            A;
3307ec626240SStefano Zampini 
3308ec626240SStefano Zampini   PetscFunctionBegin;
33099566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
33109566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat));
33119566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3312ec626240SStefano Zampini   PetscFunctionReturn(0);
3313ec626240SStefano Zampini }
3314ec626240SStefano Zampini 
3315bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3316480dffcfSJed Brown /*@
33170bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33180bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33190bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33200bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33210bad9183SKris Buschelman 
3322ff585edeSJed Brown   Collective
3323ff585edeSJed Brown 
3324ff585edeSJed Brown   Input Parameters:
3325ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3326ff585edeSJed Brown - dof - the block size (number of components per node)
3327ff585edeSJed Brown 
3328ff585edeSJed Brown   Output Parameter:
3329ff585edeSJed Brown . maij - the new MAIJ matrix
3330ff585edeSJed Brown 
33310bad9183SKris Buschelman   Operations provided:
333267b8a455SSatish Balay .vb
333367b8a455SSatish Balay     MatMult
333467b8a455SSatish Balay     MatMultTranspose
333567b8a455SSatish Balay     MatMultAdd
333667b8a455SSatish Balay     MatMultTransposeAdd
333767b8a455SSatish Balay     MatView
333867b8a455SSatish Balay .ve
33390bad9183SKris Buschelman 
33400bad9183SKris Buschelman   Level: advanced
33410bad9183SKris Buschelman 
3342db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3343ff585edeSJed Brown @*/
33447087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
334582b1193eSBarry Smith {
3346b24ad042SBarry Smith   PetscMPIInt    size;
3347b24ad042SBarry Smith   PetscInt       n;
334882b1193eSBarry Smith   Mat            B;
3349fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3350fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3351fdc842d1SBarry Smith   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3352fdc842d1SBarry Smith #endif
335382b1193eSBarry Smith 
335482b1193eSBarry Smith   PetscFunctionBegin;
3355fdc842d1SBarry Smith   dof  = PetscAbs(dof);
33569566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3357d72c5c08SBarry Smith 
335826fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
335926fbe8dcSKarl Rupp   else {
33609566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3361c248e2fdSStefano Zampini     /* propagate vec type */
33629566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B,A->defaultvectype));
33639566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N));
33649566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap,dof));
33659566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap,dof));
33669566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
33679566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
336826fbe8dcSKarl Rupp 
3369cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3370d72c5c08SBarry Smith 
33719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3372f4a53059SBarry Smith     if (size == 1) {
3373feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3374feb9fe23SJed Brown 
33759566063dSJacob Faibussowitsch       PetscCall(MatSetType(B,MATSEQMAIJ));
337626fbe8dcSKarl Rupp 
33770298fd71SBarry Smith       B->ops->setup   = NULL;
33784cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33790fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
33804222ddf1SHong Zhang 
3381feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3382b9b97703SBarry Smith       b->dof          = dof;
33834cb9d9b8SBarry Smith       b->AIJ          = A;
338426fbe8dcSKarl Rupp 
3385d72c5c08SBarry Smith       if (dof == 2) {
3386cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3387cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3388cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3389cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3390bcc973b7SBarry Smith       } else if (dof == 3) {
3391bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3392bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3393bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3394bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3395bcc973b7SBarry Smith       } else if (dof == 4) {
3396bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3397bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3398bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3399bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3400f9fae5adSBarry Smith       } else if (dof == 5) {
3401f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3402f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3403f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3404f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34056cd98798SBarry Smith       } else if (dof == 6) {
34066cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34076cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34086cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34096cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3410ed8eea03SBarry Smith       } else if (dof == 7) {
3411ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3412ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3413ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3414ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
341566ed3db0SBarry Smith       } else if (dof == 8) {
341666ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
341766ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
341866ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
341966ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34202b692628SMatthew Knepley       } else if (dof == 9) {
34212b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34222b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34232b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34242b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3425b9cda22cSBarry Smith       } else if (dof == 10) {
3426b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3427b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3428b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3429b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3430dbdd7285SBarry Smith       } else if (dof == 11) {
3431dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3432dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3433dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3434dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34352f7816d4SBarry Smith       } else if (dof == 16) {
34362f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34372f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34382f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34392f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3440ed1c418cSBarry Smith       } else if (dof == 18) {
3441ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3442ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3443ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3444ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
344582b1193eSBarry Smith       } else {
34466861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34476861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34486861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34496861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
345082b1193eSBarry Smith       }
3451fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34529566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ));
3453fdc842d1SBarry Smith #endif
34549566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ));
34559566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3456f4a53059SBarry Smith     } else {
3457f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3458feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3459f4a53059SBarry Smith       IS          from,to;
3460f4a53059SBarry Smith       Vec         gvec;
3461f4a53059SBarry Smith 
34629566063dSJacob Faibussowitsch       PetscCall(MatSetType(B,MATMPIMAIJ));
346326fbe8dcSKarl Rupp 
34640298fd71SBarry Smith       B->ops->setup            = NULL;
3465d72c5c08SBarry Smith       B->ops->destroy          = MatDestroy_MPIMAIJ;
34660fd73130SBarry Smith       B->ops->view             = MatView_MPIMAIJ;
346726fbe8dcSKarl Rupp 
3468b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3469b9b97703SBarry Smith       b->dof = dof;
3470b9b97703SBarry Smith       b->A   = A;
347126fbe8dcSKarl Rupp 
34729566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ));
34739566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ));
34744cb9d9b8SBarry Smith 
34759566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec,&n));
34769566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF,&b->w));
34779566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w,n*dof,n*dof));
34789566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w,dof));
34799566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w,VECSEQ));
3480f4a53059SBarry Smith 
3481f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
34829566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
34839566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to));
3484f4a53059SBarry Smith 
3485f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
34869566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec));
3487f4a53059SBarry Smith 
3488f4a53059SBarry Smith       /* generate the scatter context */
34899566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx));
3490f4a53059SBarry Smith 
34919566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34929566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34939566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3494f4a53059SBarry Smith 
3495f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34964cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34974cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34984cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
349926fbe8dcSKarl Rupp 
3500fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
35019566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ));
3502fdc842d1SBarry Smith #endif
35039566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ));
35049566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3505f4a53059SBarry Smith     }
35067dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3507ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
35089566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3509fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3510fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3511fdc842d1SBarry Smith     {
3512fdc842d1SBarry Smith       PetscBool flg;
3513fdc842d1SBarry Smith       if (convert) {
35149566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,""));
3515fdc842d1SBarry Smith         if (flg) {
35169566063dSJacob Faibussowitsch           PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B));
3517fdc842d1SBarry Smith         }
3518fdc842d1SBarry Smith       }
3519fdc842d1SBarry Smith     }
3520fdc842d1SBarry Smith #endif
3521cd3bbe55SBarry Smith     *maij = B;
3522d72c5c08SBarry Smith   }
352382b1193eSBarry Smith   PetscFunctionReturn(0);
352482b1193eSBarry Smith }
3525