xref: /petsc/src/mat/impls/maij/maij.c (revision 08401ef684002a709c6d3db98a0c9f54a8bcf1ec)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38ff585edeSJed Brown .seealso: MatCreateMAIJ()
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
43b9b97703SBarry Smith 
44b9b97703SBarry Smith   PetscFunctionBegin;
459566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij));
469566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij));
47b9b97703SBarry Smith   if (ismpimaij) {
48b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49b9b97703SBarry Smith 
50b9b97703SBarry Smith     *B = b->A;
51b9b97703SBarry Smith   } else if (isseqmaij) {
52b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53b9b97703SBarry Smith 
54b9b97703SBarry Smith     *B = b->AIJ;
55b9b97703SBarry Smith   } else {
56b9b97703SBarry Smith     *B = A;
57b9b97703SBarry Smith   }
58b9b97703SBarry Smith   PetscFunctionReturn(0);
59b9b97703SBarry Smith }
60b9b97703SBarry Smith 
61480dffcfSJed Brown /*@
62ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63ff585edeSJed Brown 
643f9fe445SBarry Smith    Logically Collective
65ff585edeSJed Brown 
66d8d19677SJose E. Roman    Input Parameters:
67ff585edeSJed Brown +  A - the MAIJ matrix
68ff585edeSJed Brown -  dof - the block size for the new matrix
69ff585edeSJed Brown 
70ff585edeSJed Brown    Output Parameter:
71ff585edeSJed Brown .  B - the new MAIJ matrix
72ff585edeSJed Brown 
73ff585edeSJed Brown    Level: advanced
74ff585edeSJed Brown 
75ff585edeSJed Brown .seealso: MatCreateMAIJ()
76ff585edeSJed Brown @*/
777087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78b9b97703SBarry Smith {
790298fd71SBarry Smith   Mat            Aij = NULL;
80b9b97703SBarry Smith 
81b9b97703SBarry Smith   PetscFunctionBegin;
82c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
839566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A,&Aij));
849566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij,dof,B));
85b9b97703SBarry Smith   PetscFunctionReturn(0);
86b9b97703SBarry Smith }
87b9b97703SBarry Smith 
88dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
8982b1193eSBarry Smith {
904cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9182b1193eSBarry Smith 
9282b1193eSBarry Smith   PetscFunctionBegin;
939566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
949566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL));
969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL));
974cb9d9b8SBarry Smith   PetscFunctionReturn(0);
984cb9d9b8SBarry Smith }
994cb9d9b8SBarry Smith 
100356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
101356c569eSBarry Smith {
102356c569eSBarry Smith   PetscFunctionBegin;
103ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
104356c569eSBarry Smith }
105356c569eSBarry Smith 
1060fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1070fd73130SBarry Smith {
1080fd73130SBarry Smith   Mat            B;
1090fd73130SBarry Smith 
1100fd73130SBarry Smith   PetscFunctionBegin;
1119566063dSJacob Faibussowitsch   PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1129566063dSJacob Faibussowitsch   PetscCall(MatView(B,viewer));
1139566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1140fd73130SBarry Smith   PetscFunctionReturn(0);
1150fd73130SBarry Smith }
1160fd73130SBarry Smith 
1170fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1180fd73130SBarry Smith {
1190fd73130SBarry Smith   Mat            B;
1200fd73130SBarry Smith 
1210fd73130SBarry Smith   PetscFunctionBegin;
1229566063dSJacob Faibussowitsch   PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B));
1239566063dSJacob Faibussowitsch   PetscCall(MatView(B,viewer));
1249566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1250fd73130SBarry Smith   PetscFunctionReturn(0);
1260fd73130SBarry Smith }
1270fd73130SBarry Smith 
128dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1294cb9d9b8SBarry Smith {
1304cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1314cb9d9b8SBarry Smith 
1324cb9d9b8SBarry Smith   PetscFunctionBegin;
1339566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1349566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1369566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1379566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1389566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL));
1409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL));
1419566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
142b9b97703SBarry Smith   PetscFunctionReturn(0);
143b9b97703SBarry Smith }
144b9b97703SBarry Smith 
1450bad9183SKris Buschelman /*MC
146fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1470bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1480bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1490bad9183SKris Buschelman 
1500bad9183SKris Buschelman   Operations provided:
15167b8a455SSatish Balay .vb
15267b8a455SSatish Balay     MatMult
15367b8a455SSatish Balay     MatMultTranspose
15467b8a455SSatish Balay     MatMultAdd
15567b8a455SSatish Balay     MatMultTransposeAdd
15667b8a455SSatish Balay .ve
1570bad9183SKris Buschelman 
1580bad9183SKris Buschelman   Level: advanced
1590bad9183SKris Buschelman 
160b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1610bad9183SKris Buschelman M*/
1620bad9183SKris Buschelman 
1638cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
16482b1193eSBarry Smith {
1654cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
16664b52464SHong Zhang   PetscMPIInt    size;
16782b1193eSBarry Smith 
16882b1193eSBarry Smith   PetscFunctionBegin;
1699566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A,&b));
170b0a32e0cSBarry Smith   A->data  = (void*)b;
17126fbe8dcSKarl Rupp 
1729566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
17326fbe8dcSKarl Rupp 
174356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
175f4a53059SBarry Smith 
176f4259b30SLisandro Dalcin   b->AIJ  = NULL;
177cd3bbe55SBarry Smith   b->dof  = 0;
178f4259b30SLisandro Dalcin   b->OAIJ = NULL;
179f4259b30SLisandro Dalcin   b->ctx  = NULL;
180f4259b30SLisandro Dalcin   b->w    = NULL;
1819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
18264b52464SHong Zhang   if (size == 1) {
1839566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ));
18464b52464SHong Zhang   } else {
1859566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ));
18664b52464SHong Zhang   }
18732e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
18832e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
18982b1193eSBarry Smith   PetscFunctionReturn(0);
19082b1193eSBarry Smith }
19182b1193eSBarry Smith 
192cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
193dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
19482b1193eSBarry Smith {
1954cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
196bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
197d2840e09SBarry Smith   const PetscScalar *x,*v;
198d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
199d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
200d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
20182b1193eSBarry Smith 
202bcc973b7SBarry Smith   PetscFunctionBegin;
2039566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2049566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
205bcc973b7SBarry Smith   idx  = a->j;
206bcc973b7SBarry Smith   v    = a->a;
207bcc973b7SBarry Smith   ii   = a->i;
208bcc973b7SBarry Smith 
209bcc973b7SBarry Smith   for (i=0; i<m; i++) {
210bcc973b7SBarry Smith     jrow  = ii[i];
211bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
212bcc973b7SBarry Smith     sum1  = 0.0;
213bcc973b7SBarry Smith     sum2  = 0.0;
21426fbe8dcSKarl Rupp 
215b3c51c66SMatthew Knepley     nonzerorow += (n>0);
216bcc973b7SBarry Smith     for (j=0; j<n; j++) {
217bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
218bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
219bcc973b7SBarry Smith       jrow++;
220bcc973b7SBarry Smith     }
221bcc973b7SBarry Smith     y[2*i]   = sum1;
222bcc973b7SBarry Smith     y[2*i+1] = sum2;
223bcc973b7SBarry Smith   }
224bcc973b7SBarry Smith 
2259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow));
2269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
22882b1193eSBarry Smith   PetscFunctionReturn(0);
22982b1193eSBarry Smith }
230bcc973b7SBarry Smith 
231dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
23282b1193eSBarry Smith {
2334cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
234bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
235d2840e09SBarry Smith   const PetscScalar *x,*v;
236d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
237d2840e09SBarry Smith   PetscInt          n,i;
238d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
23982b1193eSBarry Smith 
240bcc973b7SBarry Smith   PetscFunctionBegin;
2419566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
2429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
244bfec09a0SHong Zhang 
245bcc973b7SBarry Smith   for (i=0; i<m; i++) {
246bfec09a0SHong Zhang     idx    = a->j + a->i[i];
247bfec09a0SHong Zhang     v      = a->a + a->i[i];
248bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
249bcc973b7SBarry Smith     alpha1 = x[2*i];
250bcc973b7SBarry Smith     alpha2 = x[2*i+1];
25126fbe8dcSKarl Rupp     while (n-->0) {
25226fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
25326fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
25426fbe8dcSKarl Rupp       idx++; v++;
25526fbe8dcSKarl Rupp     }
256bcc973b7SBarry Smith   }
2579566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
2589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
26082b1193eSBarry Smith   PetscFunctionReturn(0);
26182b1193eSBarry Smith }
262bcc973b7SBarry Smith 
263dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
26482b1193eSBarry Smith {
2654cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
266bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
267d2840e09SBarry Smith   const PetscScalar *x,*v;
268d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
269b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
270d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27182b1193eSBarry Smith 
272bcc973b7SBarry Smith   PetscFunctionBegin;
2739566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
2749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
2759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
276bcc973b7SBarry Smith   idx  = a->j;
277bcc973b7SBarry Smith   v    = a->a;
278bcc973b7SBarry Smith   ii   = a->i;
279bcc973b7SBarry Smith 
280bcc973b7SBarry Smith   for (i=0; i<m; i++) {
281bcc973b7SBarry Smith     jrow = ii[i];
282bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
283bcc973b7SBarry Smith     sum1 = 0.0;
284bcc973b7SBarry Smith     sum2 = 0.0;
285bcc973b7SBarry Smith     for (j=0; j<n; j++) {
286bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
287bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
288bcc973b7SBarry Smith       jrow++;
289bcc973b7SBarry Smith     }
290bcc973b7SBarry Smith     y[2*i]   += sum1;
291bcc973b7SBarry Smith     y[2*i+1] += sum2;
292bcc973b7SBarry Smith   }
293bcc973b7SBarry Smith 
2949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
2959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
2969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
297bcc973b7SBarry Smith   PetscFunctionReturn(0);
29882b1193eSBarry Smith }
299dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
30082b1193eSBarry Smith {
3014cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
302bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
303d2840e09SBarry Smith   const PetscScalar *x,*v;
304d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
305d2840e09SBarry Smith   PetscInt          n,i;
306d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
30782b1193eSBarry Smith 
308bcc973b7SBarry Smith   PetscFunctionBegin;
3099566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
3109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
312bfec09a0SHong Zhang 
313bcc973b7SBarry Smith   for (i=0; i<m; i++) {
314bfec09a0SHong Zhang     idx    = a->j + a->i[i];
315bfec09a0SHong Zhang     v      = a->a + a->i[i];
316bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
317bcc973b7SBarry Smith     alpha1 = x[2*i];
318bcc973b7SBarry Smith     alpha2 = x[2*i+1];
31926fbe8dcSKarl Rupp     while (n-->0) {
32026fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
32126fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
32226fbe8dcSKarl Rupp       idx++; v++;
32326fbe8dcSKarl Rupp     }
324bcc973b7SBarry Smith   }
3259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0*a->nz));
3269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
328bcc973b7SBarry Smith   PetscFunctionReturn(0);
32982b1193eSBarry Smith }
330cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
331dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
332bcc973b7SBarry Smith {
3334cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
334bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
335d2840e09SBarry Smith   const PetscScalar *x,*v;
336d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
337d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
338d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
33982b1193eSBarry Smith 
340bcc973b7SBarry Smith   PetscFunctionBegin;
3419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
343bcc973b7SBarry Smith   idx  = a->j;
344bcc973b7SBarry Smith   v    = a->a;
345bcc973b7SBarry Smith   ii   = a->i;
346bcc973b7SBarry Smith 
347bcc973b7SBarry Smith   for (i=0; i<m; i++) {
348bcc973b7SBarry Smith     jrow  = ii[i];
349bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
350bcc973b7SBarry Smith     sum1  = 0.0;
351bcc973b7SBarry Smith     sum2  = 0.0;
352bcc973b7SBarry Smith     sum3  = 0.0;
35326fbe8dcSKarl Rupp 
354b3c51c66SMatthew Knepley     nonzerorow += (n>0);
355bcc973b7SBarry Smith     for (j=0; j<n; j++) {
356bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
357bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
358bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
359bcc973b7SBarry Smith       jrow++;
360bcc973b7SBarry Smith      }
361bcc973b7SBarry Smith     y[3*i]   = sum1;
362bcc973b7SBarry Smith     y[3*i+1] = sum2;
363bcc973b7SBarry Smith     y[3*i+2] = sum3;
364bcc973b7SBarry Smith   }
365bcc973b7SBarry Smith 
3669566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow));
3679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
3689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
369bcc973b7SBarry Smith   PetscFunctionReturn(0);
370bcc973b7SBarry Smith }
371bcc973b7SBarry Smith 
372dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
373bcc973b7SBarry Smith {
3744cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
375bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
376d2840e09SBarry Smith   const PetscScalar *x,*v;
377d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
378d2840e09SBarry Smith   PetscInt          n,i;
379d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
380bcc973b7SBarry Smith 
381bcc973b7SBarry Smith   PetscFunctionBegin;
3829566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
3839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
3849566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
385bfec09a0SHong Zhang 
386bcc973b7SBarry Smith   for (i=0; i<m; i++) {
387bfec09a0SHong Zhang     idx    = a->j + a->i[i];
388bfec09a0SHong Zhang     v      = a->a + a->i[i];
389bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
390bcc973b7SBarry Smith     alpha1 = x[3*i];
391bcc973b7SBarry Smith     alpha2 = x[3*i+1];
392bcc973b7SBarry Smith     alpha3 = x[3*i+2];
393bcc973b7SBarry Smith     while (n-->0) {
394bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
395bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
396bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
397bcc973b7SBarry Smith       idx++; v++;
398bcc973b7SBarry Smith     }
399bcc973b7SBarry Smith   }
4009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
403bcc973b7SBarry Smith   PetscFunctionReturn(0);
404bcc973b7SBarry Smith }
405bcc973b7SBarry Smith 
406dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
407bcc973b7SBarry Smith {
4084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
409bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
410d2840e09SBarry Smith   const PetscScalar *x,*v;
411d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
412b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
413d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
414bcc973b7SBarry Smith 
415bcc973b7SBarry Smith   PetscFunctionBegin;
4169566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
4179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
419bcc973b7SBarry Smith   idx  = a->j;
420bcc973b7SBarry Smith   v    = a->a;
421bcc973b7SBarry Smith   ii   = a->i;
422bcc973b7SBarry Smith 
423bcc973b7SBarry Smith   for (i=0; i<m; i++) {
424bcc973b7SBarry Smith     jrow = ii[i];
425bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
426bcc973b7SBarry Smith     sum1 = 0.0;
427bcc973b7SBarry Smith     sum2 = 0.0;
428bcc973b7SBarry Smith     sum3 = 0.0;
429bcc973b7SBarry Smith     for (j=0; j<n; j++) {
430bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
431bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
432bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
433bcc973b7SBarry Smith       jrow++;
434bcc973b7SBarry Smith     }
435bcc973b7SBarry Smith     y[3*i]   += sum1;
436bcc973b7SBarry Smith     y[3*i+1] += sum2;
437bcc973b7SBarry Smith     y[3*i+2] += sum3;
438bcc973b7SBarry Smith   }
439bcc973b7SBarry Smith 
4409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
443bcc973b7SBarry Smith   PetscFunctionReturn(0);
444bcc973b7SBarry Smith }
445dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
446bcc973b7SBarry Smith {
4474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
448bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
449d2840e09SBarry Smith   const PetscScalar *x,*v;
450d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
451d2840e09SBarry Smith   PetscInt          n,i;
452d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
453bcc973b7SBarry Smith 
454bcc973b7SBarry Smith   PetscFunctionBegin;
4559566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
4569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
458bcc973b7SBarry Smith   for (i=0; i<m; i++) {
459bfec09a0SHong Zhang     idx    = a->j + a->i[i];
460bfec09a0SHong Zhang     v      = a->a + a->i[i];
461bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
462bcc973b7SBarry Smith     alpha1 = x[3*i];
463bcc973b7SBarry Smith     alpha2 = x[3*i+1];
464bcc973b7SBarry Smith     alpha3 = x[3*i+2];
465bcc973b7SBarry Smith     while (n-->0) {
466bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
467bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
468bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
469bcc973b7SBarry Smith       idx++; v++;
470bcc973b7SBarry Smith     }
471bcc973b7SBarry Smith   }
4729566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0*a->nz));
4739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
4749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
475bcc973b7SBarry Smith   PetscFunctionReturn(0);
476bcc973b7SBarry Smith }
477bcc973b7SBarry Smith 
478bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
479dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
480bcc973b7SBarry Smith {
4814cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
482bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
483d2840e09SBarry Smith   const PetscScalar *x,*v;
484d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
485d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
486d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
487bcc973b7SBarry Smith 
488bcc973b7SBarry Smith   PetscFunctionBegin;
4899566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
4909566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
491bcc973b7SBarry Smith   idx  = a->j;
492bcc973b7SBarry Smith   v    = a->a;
493bcc973b7SBarry Smith   ii   = a->i;
494bcc973b7SBarry Smith 
495bcc973b7SBarry Smith   for (i=0; i<m; i++) {
496bcc973b7SBarry Smith     jrow        = ii[i];
497bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
498bcc973b7SBarry Smith     sum1        = 0.0;
499bcc973b7SBarry Smith     sum2        = 0.0;
500bcc973b7SBarry Smith     sum3        = 0.0;
501bcc973b7SBarry Smith     sum4        = 0.0;
502b3c51c66SMatthew Knepley     nonzerorow += (n>0);
503bcc973b7SBarry Smith     for (j=0; j<n; j++) {
504bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
505bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
506bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
507bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
508bcc973b7SBarry Smith       jrow++;
509bcc973b7SBarry Smith     }
510bcc973b7SBarry Smith     y[4*i]   = sum1;
511bcc973b7SBarry Smith     y[4*i+1] = sum2;
512bcc973b7SBarry Smith     y[4*i+2] = sum3;
513bcc973b7SBarry Smith     y[4*i+3] = sum4;
514bcc973b7SBarry Smith   }
515bcc973b7SBarry Smith 
5169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow));
5179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
519bcc973b7SBarry Smith   PetscFunctionReturn(0);
520bcc973b7SBarry Smith }
521bcc973b7SBarry Smith 
522dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
523bcc973b7SBarry Smith {
5244cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
525bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
526d2840e09SBarry Smith   const PetscScalar *x,*v;
527d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
528d2840e09SBarry Smith   PetscInt          n,i;
529d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
530bcc973b7SBarry Smith 
531bcc973b7SBarry Smith   PetscFunctionBegin;
5329566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
5339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5349566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
535bcc973b7SBarry Smith   for (i=0; i<m; i++) {
536bfec09a0SHong Zhang     idx    = a->j + a->i[i];
537bfec09a0SHong Zhang     v      = a->a + a->i[i];
538bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
539bcc973b7SBarry Smith     alpha1 = x[4*i];
540bcc973b7SBarry Smith     alpha2 = x[4*i+1];
541bcc973b7SBarry Smith     alpha3 = x[4*i+2];
542bcc973b7SBarry Smith     alpha4 = x[4*i+3];
543bcc973b7SBarry Smith     while (n-->0) {
544bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
545bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
546bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
547bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
548bcc973b7SBarry Smith       idx++; v++;
549bcc973b7SBarry Smith     }
550bcc973b7SBarry Smith   }
5519566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
5529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
554bcc973b7SBarry Smith   PetscFunctionReturn(0);
555bcc973b7SBarry Smith }
556bcc973b7SBarry Smith 
557dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
558bcc973b7SBarry Smith {
5594cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561d2840e09SBarry Smith   const PetscScalar *x,*v;
562d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
563b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
564d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
565f9fae5adSBarry Smith 
566f9fae5adSBarry Smith   PetscFunctionBegin;
5679566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
5689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
5699566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
570f9fae5adSBarry Smith   idx  = a->j;
571f9fae5adSBarry Smith   v    = a->a;
572f9fae5adSBarry Smith   ii   = a->i;
573f9fae5adSBarry Smith 
574f9fae5adSBarry Smith   for (i=0; i<m; i++) {
575f9fae5adSBarry Smith     jrow = ii[i];
576f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
577f9fae5adSBarry Smith     sum1 = 0.0;
578f9fae5adSBarry Smith     sum2 = 0.0;
579f9fae5adSBarry Smith     sum3 = 0.0;
580f9fae5adSBarry Smith     sum4 = 0.0;
581f9fae5adSBarry Smith     for (j=0; j<n; j++) {
582f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
583f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
584f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
585f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
586f9fae5adSBarry Smith       jrow++;
587f9fae5adSBarry Smith     }
588f9fae5adSBarry Smith     y[4*i]   += sum1;
589f9fae5adSBarry Smith     y[4*i+1] += sum2;
590f9fae5adSBarry Smith     y[4*i+2] += sum3;
591f9fae5adSBarry Smith     y[4*i+3] += sum4;
592f9fae5adSBarry Smith   }
593f9fae5adSBarry Smith 
5949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
5959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
5969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
597f9fae5adSBarry Smith   PetscFunctionReturn(0);
598bcc973b7SBarry Smith }
599dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
600bcc973b7SBarry Smith {
6014cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
602f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
603d2840e09SBarry Smith   const PetscScalar *x,*v;
604d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
605d2840e09SBarry Smith   PetscInt          n,i;
606d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
607f9fae5adSBarry Smith 
608f9fae5adSBarry Smith   PetscFunctionBegin;
6099566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
6109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
612bfec09a0SHong Zhang 
613f9fae5adSBarry Smith   for (i=0; i<m; i++) {
614bfec09a0SHong Zhang     idx    = a->j + a->i[i];
615bfec09a0SHong Zhang     v      = a->a + a->i[i];
616f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
617f9fae5adSBarry Smith     alpha1 = x[4*i];
618f9fae5adSBarry Smith     alpha2 = x[4*i+1];
619f9fae5adSBarry Smith     alpha3 = x[4*i+2];
620f9fae5adSBarry Smith     alpha4 = x[4*i+3];
621f9fae5adSBarry Smith     while (n-->0) {
622f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
623f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
624f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
625f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
626f9fae5adSBarry Smith       idx++; v++;
627f9fae5adSBarry Smith     }
628f9fae5adSBarry Smith   }
6299566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0*a->nz));
6309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
632f9fae5adSBarry Smith   PetscFunctionReturn(0);
633f9fae5adSBarry Smith }
634f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6356cd98798SBarry Smith 
636dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
637f9fae5adSBarry Smith {
6384cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
639f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
640d2840e09SBarry Smith   const PetscScalar *x,*v;
641d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
642d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
643d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
644f9fae5adSBarry Smith 
645f9fae5adSBarry Smith   PetscFunctionBegin;
6469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
648f9fae5adSBarry Smith   idx  = a->j;
649f9fae5adSBarry Smith   v    = a->a;
650f9fae5adSBarry Smith   ii   = a->i;
651f9fae5adSBarry Smith 
652f9fae5adSBarry Smith   for (i=0; i<m; i++) {
653f9fae5adSBarry Smith     jrow  = ii[i];
654f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
655f9fae5adSBarry Smith     sum1  = 0.0;
656f9fae5adSBarry Smith     sum2  = 0.0;
657f9fae5adSBarry Smith     sum3  = 0.0;
658f9fae5adSBarry Smith     sum4  = 0.0;
659f9fae5adSBarry Smith     sum5  = 0.0;
66026fbe8dcSKarl Rupp 
661b3c51c66SMatthew Knepley     nonzerorow += (n>0);
662f9fae5adSBarry Smith     for (j=0; j<n; j++) {
663f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
664f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
665f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
666f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
667f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
668f9fae5adSBarry Smith       jrow++;
669f9fae5adSBarry Smith     }
670f9fae5adSBarry Smith     y[5*i]   = sum1;
671f9fae5adSBarry Smith     y[5*i+1] = sum2;
672f9fae5adSBarry Smith     y[5*i+2] = sum3;
673f9fae5adSBarry Smith     y[5*i+3] = sum4;
674f9fae5adSBarry Smith     y[5*i+4] = sum5;
675f9fae5adSBarry Smith   }
676f9fae5adSBarry Smith 
6779566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow));
6789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
6799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
680f9fae5adSBarry Smith   PetscFunctionReturn(0);
681f9fae5adSBarry Smith }
682f9fae5adSBarry Smith 
683dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684f9fae5adSBarry Smith {
6854cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
686f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
687d2840e09SBarry Smith   const PetscScalar *x,*v;
688d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
689d2840e09SBarry Smith   PetscInt          n,i;
690d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
691f9fae5adSBarry Smith 
692f9fae5adSBarry Smith   PetscFunctionBegin;
6939566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
6949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
6959566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
696bfec09a0SHong Zhang 
697f9fae5adSBarry Smith   for (i=0; i<m; i++) {
698bfec09a0SHong Zhang     idx    = a->j + a->i[i];
699bfec09a0SHong Zhang     v      = a->a + a->i[i];
700f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
701f9fae5adSBarry Smith     alpha1 = x[5*i];
702f9fae5adSBarry Smith     alpha2 = x[5*i+1];
703f9fae5adSBarry Smith     alpha3 = x[5*i+2];
704f9fae5adSBarry Smith     alpha4 = x[5*i+3];
705f9fae5adSBarry Smith     alpha5 = x[5*i+4];
706f9fae5adSBarry Smith     while (n-->0) {
707f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
708f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
709f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
710f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
711f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
712f9fae5adSBarry Smith       idx++; v++;
713f9fae5adSBarry Smith     }
714f9fae5adSBarry Smith   }
7159566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
7169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
7179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
718f9fae5adSBarry Smith   PetscFunctionReturn(0);
719f9fae5adSBarry Smith }
720f9fae5adSBarry Smith 
721dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
722f9fae5adSBarry Smith {
7234cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
724f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
725d2840e09SBarry Smith   const PetscScalar *x,*v;
726d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
727b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
728d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
729f9fae5adSBarry Smith 
730f9fae5adSBarry Smith   PetscFunctionBegin;
7319566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
7329566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
7339566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
734f9fae5adSBarry Smith   idx  = a->j;
735f9fae5adSBarry Smith   v    = a->a;
736f9fae5adSBarry Smith   ii   = a->i;
737f9fae5adSBarry Smith 
738f9fae5adSBarry Smith   for (i=0; i<m; i++) {
739f9fae5adSBarry Smith     jrow = ii[i];
740f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
741f9fae5adSBarry Smith     sum1 = 0.0;
742f9fae5adSBarry Smith     sum2 = 0.0;
743f9fae5adSBarry Smith     sum3 = 0.0;
744f9fae5adSBarry Smith     sum4 = 0.0;
745f9fae5adSBarry Smith     sum5 = 0.0;
746f9fae5adSBarry Smith     for (j=0; j<n; j++) {
747f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
748f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
749f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
750f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
751f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
752f9fae5adSBarry Smith       jrow++;
753f9fae5adSBarry Smith     }
754f9fae5adSBarry Smith     y[5*i]   += sum1;
755f9fae5adSBarry Smith     y[5*i+1] += sum2;
756f9fae5adSBarry Smith     y[5*i+2] += sum3;
757f9fae5adSBarry Smith     y[5*i+3] += sum4;
758f9fae5adSBarry Smith     y[5*i+4] += sum5;
759f9fae5adSBarry Smith   }
760f9fae5adSBarry Smith 
7619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
7629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
7639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
764f9fae5adSBarry Smith   PetscFunctionReturn(0);
765f9fae5adSBarry Smith }
766f9fae5adSBarry Smith 
767dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768f9fae5adSBarry Smith {
7694cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
770f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
771d2840e09SBarry Smith   const PetscScalar *x,*v;
772d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
773d2840e09SBarry Smith   PetscInt          n,i;
774d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
775f9fae5adSBarry Smith 
776f9fae5adSBarry Smith   PetscFunctionBegin;
7779566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
7789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
7799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
780bfec09a0SHong Zhang 
781f9fae5adSBarry Smith   for (i=0; i<m; i++) {
782bfec09a0SHong Zhang     idx    = a->j + a->i[i];
783bfec09a0SHong Zhang     v      = a->a + a->i[i];
784f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
785f9fae5adSBarry Smith     alpha1 = x[5*i];
786f9fae5adSBarry Smith     alpha2 = x[5*i+1];
787f9fae5adSBarry Smith     alpha3 = x[5*i+2];
788f9fae5adSBarry Smith     alpha4 = x[5*i+3];
789f9fae5adSBarry Smith     alpha5 = x[5*i+4];
790f9fae5adSBarry Smith     while (n-->0) {
791f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
792f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
793f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
794f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
795f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
796f9fae5adSBarry Smith       idx++; v++;
797f9fae5adSBarry Smith     }
798f9fae5adSBarry Smith   }
7999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0*a->nz));
8009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
802f9fae5adSBarry Smith   PetscFunctionReturn(0);
803bcc973b7SBarry Smith }
804bcc973b7SBarry Smith 
8056cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
806dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8076cd98798SBarry Smith {
8086cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8096cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
810d2840e09SBarry Smith   const PetscScalar *x,*v;
811d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
812d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
813d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8146cd98798SBarry Smith 
8156cd98798SBarry Smith   PetscFunctionBegin;
8169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
8179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
8186cd98798SBarry Smith   idx  = a->j;
8196cd98798SBarry Smith   v    = a->a;
8206cd98798SBarry Smith   ii   = a->i;
8216cd98798SBarry Smith 
8226cd98798SBarry Smith   for (i=0; i<m; i++) {
8236cd98798SBarry Smith     jrow  = ii[i];
8246cd98798SBarry Smith     n     = ii[i+1] - jrow;
8256cd98798SBarry Smith     sum1  = 0.0;
8266cd98798SBarry Smith     sum2  = 0.0;
8276cd98798SBarry Smith     sum3  = 0.0;
8286cd98798SBarry Smith     sum4  = 0.0;
8296cd98798SBarry Smith     sum5  = 0.0;
8306cd98798SBarry Smith     sum6  = 0.0;
83126fbe8dcSKarl Rupp 
832b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8336cd98798SBarry Smith     for (j=0; j<n; j++) {
8346cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8356cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8366cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8376cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8386cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8396cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8406cd98798SBarry Smith       jrow++;
8416cd98798SBarry Smith     }
8426cd98798SBarry Smith     y[6*i]   = sum1;
8436cd98798SBarry Smith     y[6*i+1] = sum2;
8446cd98798SBarry Smith     y[6*i+2] = sum3;
8456cd98798SBarry Smith     y[6*i+3] = sum4;
8466cd98798SBarry Smith     y[6*i+4] = sum5;
8476cd98798SBarry Smith     y[6*i+5] = sum6;
8486cd98798SBarry Smith   }
8496cd98798SBarry Smith 
8509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow));
8519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
8536cd98798SBarry Smith   PetscFunctionReturn(0);
8546cd98798SBarry Smith }
8556cd98798SBarry Smith 
856dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8576cd98798SBarry Smith {
8586cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8596cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
860d2840e09SBarry Smith   const PetscScalar *x,*v;
861d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
862d2840e09SBarry Smith   PetscInt          n,i;
863d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8646cd98798SBarry Smith 
8656cd98798SBarry Smith   PetscFunctionBegin;
8669566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
8679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
8689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
869bfec09a0SHong Zhang 
8706cd98798SBarry Smith   for (i=0; i<m; i++) {
871bfec09a0SHong Zhang     idx    = a->j + a->i[i];
872bfec09a0SHong Zhang     v      = a->a + a->i[i];
8736cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8746cd98798SBarry Smith     alpha1 = x[6*i];
8756cd98798SBarry Smith     alpha2 = x[6*i+1];
8766cd98798SBarry Smith     alpha3 = x[6*i+2];
8776cd98798SBarry Smith     alpha4 = x[6*i+3];
8786cd98798SBarry Smith     alpha5 = x[6*i+4];
8796cd98798SBarry Smith     alpha6 = x[6*i+5];
8806cd98798SBarry Smith     while (n-->0) {
8816cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8826cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8836cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8846cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8856cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8866cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8876cd98798SBarry Smith       idx++; v++;
8886cd98798SBarry Smith     }
8896cd98798SBarry Smith   }
8909566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
8919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
8929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
8936cd98798SBarry Smith   PetscFunctionReturn(0);
8946cd98798SBarry Smith }
8956cd98798SBarry Smith 
896dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8976cd98798SBarry Smith {
8986cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8996cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
900d2840e09SBarry Smith   const PetscScalar *x,*v;
901d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
902b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
903d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9046cd98798SBarry Smith 
9056cd98798SBarry Smith   PetscFunctionBegin;
9069566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
9079566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9089566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
9096cd98798SBarry Smith   idx  = a->j;
9106cd98798SBarry Smith   v    = a->a;
9116cd98798SBarry Smith   ii   = a->i;
9126cd98798SBarry Smith 
9136cd98798SBarry Smith   for (i=0; i<m; i++) {
9146cd98798SBarry Smith     jrow = ii[i];
9156cd98798SBarry Smith     n    = ii[i+1] - jrow;
9166cd98798SBarry Smith     sum1 = 0.0;
9176cd98798SBarry Smith     sum2 = 0.0;
9186cd98798SBarry Smith     sum3 = 0.0;
9196cd98798SBarry Smith     sum4 = 0.0;
9206cd98798SBarry Smith     sum5 = 0.0;
9216cd98798SBarry Smith     sum6 = 0.0;
9226cd98798SBarry Smith     for (j=0; j<n; j++) {
9236cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9246cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9256cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9266cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9276cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9286cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9296cd98798SBarry Smith       jrow++;
9306cd98798SBarry Smith     }
9316cd98798SBarry Smith     y[6*i]   += sum1;
9326cd98798SBarry Smith     y[6*i+1] += sum2;
9336cd98798SBarry Smith     y[6*i+2] += sum3;
9346cd98798SBarry Smith     y[6*i+3] += sum4;
9356cd98798SBarry Smith     y[6*i+4] += sum5;
9366cd98798SBarry Smith     y[6*i+5] += sum6;
9376cd98798SBarry Smith   }
9386cd98798SBarry Smith 
9399566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
9409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
9419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
9426cd98798SBarry Smith   PetscFunctionReturn(0);
9436cd98798SBarry Smith }
9446cd98798SBarry Smith 
945dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9466cd98798SBarry Smith {
9476cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9486cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
949d2840e09SBarry Smith   const PetscScalar *x,*v;
950d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951d2840e09SBarry Smith   PetscInt          n,i;
952d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9536cd98798SBarry Smith 
9546cd98798SBarry Smith   PetscFunctionBegin;
9559566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
9569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
958bfec09a0SHong Zhang 
9596cd98798SBarry Smith   for (i=0; i<m; i++) {
960bfec09a0SHong Zhang     idx    = a->j + a->i[i];
961bfec09a0SHong Zhang     v      = a->a + a->i[i];
9626cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9636cd98798SBarry Smith     alpha1 = x[6*i];
9646cd98798SBarry Smith     alpha2 = x[6*i+1];
9656cd98798SBarry Smith     alpha3 = x[6*i+2];
9666cd98798SBarry Smith     alpha4 = x[6*i+3];
9676cd98798SBarry Smith     alpha5 = x[6*i+4];
9686cd98798SBarry Smith     alpha6 = x[6*i+5];
9696cd98798SBarry Smith     while (n-->0) {
9706cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9716cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9726cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9736cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9746cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9756cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9766cd98798SBarry Smith       idx++; v++;
9776cd98798SBarry Smith     }
9786cd98798SBarry Smith   }
9799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0*a->nz));
9809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
9819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
9826cd98798SBarry Smith   PetscFunctionReturn(0);
9836cd98798SBarry Smith }
9846cd98798SBarry Smith 
98566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
986ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
987ed8eea03SBarry Smith {
988ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
989ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
990d2840e09SBarry Smith   const PetscScalar *x,*v;
991d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
992d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
993d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
994ed8eea03SBarry Smith 
995ed8eea03SBarry Smith   PetscFunctionBegin;
9969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
9979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
998ed8eea03SBarry Smith   idx  = a->j;
999ed8eea03SBarry Smith   v    = a->a;
1000ed8eea03SBarry Smith   ii   = a->i;
1001ed8eea03SBarry Smith 
1002ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1003ed8eea03SBarry Smith     jrow  = ii[i];
1004ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1005ed8eea03SBarry Smith     sum1  = 0.0;
1006ed8eea03SBarry Smith     sum2  = 0.0;
1007ed8eea03SBarry Smith     sum3  = 0.0;
1008ed8eea03SBarry Smith     sum4  = 0.0;
1009ed8eea03SBarry Smith     sum5  = 0.0;
1010ed8eea03SBarry Smith     sum6  = 0.0;
1011ed8eea03SBarry Smith     sum7  = 0.0;
101226fbe8dcSKarl Rupp 
1013b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1014ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1015ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1016ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022ed8eea03SBarry Smith       jrow++;
1023ed8eea03SBarry Smith     }
1024ed8eea03SBarry Smith     y[7*i]   = sum1;
1025ed8eea03SBarry Smith     y[7*i+1] = sum2;
1026ed8eea03SBarry Smith     y[7*i+2] = sum3;
1027ed8eea03SBarry Smith     y[7*i+3] = sum4;
1028ed8eea03SBarry Smith     y[7*i+4] = sum5;
1029ed8eea03SBarry Smith     y[7*i+5] = sum6;
1030ed8eea03SBarry Smith     y[7*i+6] = sum7;
1031ed8eea03SBarry Smith   }
1032ed8eea03SBarry Smith 
10339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow));
10349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
10359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1036ed8eea03SBarry Smith   PetscFunctionReturn(0);
1037ed8eea03SBarry Smith }
1038ed8eea03SBarry Smith 
1039ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1040ed8eea03SBarry Smith {
1041ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1042ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1043d2840e09SBarry Smith   const PetscScalar *x,*v;
1044d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1045d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1046d2840e09SBarry Smith   PetscInt          n,i;
1047ed8eea03SBarry Smith 
1048ed8eea03SBarry Smith   PetscFunctionBegin;
10499566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
10509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
10519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1052ed8eea03SBarry Smith 
1053ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1054ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1055ed8eea03SBarry Smith     v      = a->a + a->i[i];
1056ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1057ed8eea03SBarry Smith     alpha1 = x[7*i];
1058ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1059ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1060ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1061ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1062ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1063ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1064ed8eea03SBarry Smith     while (n-->0) {
1065ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1066ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1067ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1068ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1069ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1070ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1071ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1072ed8eea03SBarry Smith       idx++; v++;
1073ed8eea03SBarry Smith     }
1074ed8eea03SBarry Smith   }
10759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
10769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
10779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1078ed8eea03SBarry Smith   PetscFunctionReturn(0);
1079ed8eea03SBarry Smith }
1080ed8eea03SBarry Smith 
1081ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1082ed8eea03SBarry Smith {
1083ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1084ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1085d2840e09SBarry Smith   const PetscScalar *x,*v;
1086d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1087d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1088ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1089ed8eea03SBarry Smith 
1090ed8eea03SBarry Smith   PetscFunctionBegin;
10919566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
10929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
10939566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1094ed8eea03SBarry Smith   idx  = a->j;
1095ed8eea03SBarry Smith   v    = a->a;
1096ed8eea03SBarry Smith   ii   = a->i;
1097ed8eea03SBarry Smith 
1098ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1099ed8eea03SBarry Smith     jrow = ii[i];
1100ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1101ed8eea03SBarry Smith     sum1 = 0.0;
1102ed8eea03SBarry Smith     sum2 = 0.0;
1103ed8eea03SBarry Smith     sum3 = 0.0;
1104ed8eea03SBarry Smith     sum4 = 0.0;
1105ed8eea03SBarry Smith     sum5 = 0.0;
1106ed8eea03SBarry Smith     sum6 = 0.0;
1107ed8eea03SBarry Smith     sum7 = 0.0;
1108ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1109ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1110ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1111ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1112ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1113ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1114ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1115ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1116ed8eea03SBarry Smith       jrow++;
1117ed8eea03SBarry Smith     }
1118ed8eea03SBarry Smith     y[7*i]   += sum1;
1119ed8eea03SBarry Smith     y[7*i+1] += sum2;
1120ed8eea03SBarry Smith     y[7*i+2] += sum3;
1121ed8eea03SBarry Smith     y[7*i+3] += sum4;
1122ed8eea03SBarry Smith     y[7*i+4] += sum5;
1123ed8eea03SBarry Smith     y[7*i+5] += sum6;
1124ed8eea03SBarry Smith     y[7*i+6] += sum7;
1125ed8eea03SBarry Smith   }
1126ed8eea03SBarry Smith 
11279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
11289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
11299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1130ed8eea03SBarry Smith   PetscFunctionReturn(0);
1131ed8eea03SBarry Smith }
1132ed8eea03SBarry Smith 
1133ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1134ed8eea03SBarry Smith {
1135ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1136ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1137d2840e09SBarry Smith   const PetscScalar *x,*v;
1138d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1139d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1140d2840e09SBarry Smith   PetscInt          n,i;
1141ed8eea03SBarry Smith 
1142ed8eea03SBarry Smith   PetscFunctionBegin;
11439566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
11449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
11459566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1146ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1147ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1148ed8eea03SBarry Smith     v      = a->a + a->i[i];
1149ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1150ed8eea03SBarry Smith     alpha1 = x[7*i];
1151ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1152ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1153ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1154ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1155ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1156ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1157ed8eea03SBarry Smith     while (n-->0) {
1158ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1159ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1160ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1161ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1162ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1163ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1164ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1165ed8eea03SBarry Smith       idx++; v++;
1166ed8eea03SBarry Smith     }
1167ed8eea03SBarry Smith   }
11689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0*a->nz));
11699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
11709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1171ed8eea03SBarry Smith   PetscFunctionReturn(0);
1172ed8eea03SBarry Smith }
1173ed8eea03SBarry Smith 
1174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
117566ed3db0SBarry Smith {
117666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
117766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1178d2840e09SBarry Smith   const PetscScalar *x,*v;
1179d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1180d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1181d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
118266ed3db0SBarry Smith 
118366ed3db0SBarry Smith   PetscFunctionBegin;
11849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
11859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
118666ed3db0SBarry Smith   idx  = a->j;
118766ed3db0SBarry Smith   v    = a->a;
118866ed3db0SBarry Smith   ii   = a->i;
118966ed3db0SBarry Smith 
119066ed3db0SBarry Smith   for (i=0; i<m; i++) {
119166ed3db0SBarry Smith     jrow  = ii[i];
119266ed3db0SBarry Smith     n     = ii[i+1] - jrow;
119366ed3db0SBarry Smith     sum1  = 0.0;
119466ed3db0SBarry Smith     sum2  = 0.0;
119566ed3db0SBarry Smith     sum3  = 0.0;
119666ed3db0SBarry Smith     sum4  = 0.0;
119766ed3db0SBarry Smith     sum5  = 0.0;
119866ed3db0SBarry Smith     sum6  = 0.0;
119966ed3db0SBarry Smith     sum7  = 0.0;
120066ed3db0SBarry Smith     sum8  = 0.0;
120126fbe8dcSKarl Rupp 
1202b3c51c66SMatthew Knepley     nonzerorow += (n>0);
120366ed3db0SBarry Smith     for (j=0; j<n; j++) {
120466ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
120566ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
120666ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
120766ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
120866ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
120966ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
121066ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
121166ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
121266ed3db0SBarry Smith       jrow++;
121366ed3db0SBarry Smith     }
121466ed3db0SBarry Smith     y[8*i]   = sum1;
121566ed3db0SBarry Smith     y[8*i+1] = sum2;
121666ed3db0SBarry Smith     y[8*i+2] = sum3;
121766ed3db0SBarry Smith     y[8*i+3] = sum4;
121866ed3db0SBarry Smith     y[8*i+4] = sum5;
121966ed3db0SBarry Smith     y[8*i+5] = sum6;
122066ed3db0SBarry Smith     y[8*i+6] = sum7;
122166ed3db0SBarry Smith     y[8*i+7] = sum8;
122266ed3db0SBarry Smith   }
122366ed3db0SBarry Smith 
12249566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow));
12259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
12269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
122766ed3db0SBarry Smith   PetscFunctionReturn(0);
122866ed3db0SBarry Smith }
122966ed3db0SBarry Smith 
1230dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
123166ed3db0SBarry Smith {
123266ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
123366ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1234d2840e09SBarry Smith   const PetscScalar *x,*v;
1235d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1236d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1237d2840e09SBarry Smith   PetscInt          n,i;
123866ed3db0SBarry Smith 
123966ed3db0SBarry Smith   PetscFunctionBegin;
12409566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
12419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
12429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1243bfec09a0SHong Zhang 
124466ed3db0SBarry Smith   for (i=0; i<m; i++) {
1245bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1246bfec09a0SHong Zhang     v      = a->a + a->i[i];
124766ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
124866ed3db0SBarry Smith     alpha1 = x[8*i];
124966ed3db0SBarry Smith     alpha2 = x[8*i+1];
125066ed3db0SBarry Smith     alpha3 = x[8*i+2];
125166ed3db0SBarry Smith     alpha4 = x[8*i+3];
125266ed3db0SBarry Smith     alpha5 = x[8*i+4];
125366ed3db0SBarry Smith     alpha6 = x[8*i+5];
125466ed3db0SBarry Smith     alpha7 = x[8*i+6];
125566ed3db0SBarry Smith     alpha8 = x[8*i+7];
125666ed3db0SBarry Smith     while (n-->0) {
125766ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
125866ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
125966ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
126066ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
126166ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
126266ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
126366ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
126466ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
126566ed3db0SBarry Smith       idx++; v++;
126666ed3db0SBarry Smith     }
126766ed3db0SBarry Smith   }
12689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
12699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
12709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
127166ed3db0SBarry Smith   PetscFunctionReturn(0);
127266ed3db0SBarry Smith }
127366ed3db0SBarry Smith 
1274dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
127566ed3db0SBarry Smith {
127666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
127766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1278d2840e09SBarry Smith   const PetscScalar *x,*v;
1279d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1280d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1281b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
128266ed3db0SBarry Smith 
128366ed3db0SBarry Smith   PetscFunctionBegin;
12849566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
12859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
12869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
128766ed3db0SBarry Smith   idx  = a->j;
128866ed3db0SBarry Smith   v    = a->a;
128966ed3db0SBarry Smith   ii   = a->i;
129066ed3db0SBarry Smith 
129166ed3db0SBarry Smith   for (i=0; i<m; i++) {
129266ed3db0SBarry Smith     jrow = ii[i];
129366ed3db0SBarry Smith     n    = ii[i+1] - jrow;
129466ed3db0SBarry Smith     sum1 = 0.0;
129566ed3db0SBarry Smith     sum2 = 0.0;
129666ed3db0SBarry Smith     sum3 = 0.0;
129766ed3db0SBarry Smith     sum4 = 0.0;
129866ed3db0SBarry Smith     sum5 = 0.0;
129966ed3db0SBarry Smith     sum6 = 0.0;
130066ed3db0SBarry Smith     sum7 = 0.0;
130166ed3db0SBarry Smith     sum8 = 0.0;
130266ed3db0SBarry Smith     for (j=0; j<n; j++) {
130366ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
130466ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
130566ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
130666ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
130766ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
130866ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
130966ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
131066ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
131166ed3db0SBarry Smith       jrow++;
131266ed3db0SBarry Smith     }
131366ed3db0SBarry Smith     y[8*i]   += sum1;
131466ed3db0SBarry Smith     y[8*i+1] += sum2;
131566ed3db0SBarry Smith     y[8*i+2] += sum3;
131666ed3db0SBarry Smith     y[8*i+3] += sum4;
131766ed3db0SBarry Smith     y[8*i+4] += sum5;
131866ed3db0SBarry Smith     y[8*i+5] += sum6;
131966ed3db0SBarry Smith     y[8*i+6] += sum7;
132066ed3db0SBarry Smith     y[8*i+7] += sum8;
132166ed3db0SBarry Smith   }
132266ed3db0SBarry Smith 
13239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
13249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
13259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
132666ed3db0SBarry Smith   PetscFunctionReturn(0);
132766ed3db0SBarry Smith }
132866ed3db0SBarry Smith 
1329dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
133066ed3db0SBarry Smith {
133166ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
133266ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1333d2840e09SBarry Smith   const PetscScalar *x,*v;
1334d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1336d2840e09SBarry Smith   PetscInt          n,i;
133766ed3db0SBarry Smith 
133866ed3db0SBarry Smith   PetscFunctionBegin;
13399566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
13409566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
13419566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
134266ed3db0SBarry Smith   for (i=0; i<m; i++) {
1343bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1344bfec09a0SHong Zhang     v      = a->a + a->i[i];
134566ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
134666ed3db0SBarry Smith     alpha1 = x[8*i];
134766ed3db0SBarry Smith     alpha2 = x[8*i+1];
134866ed3db0SBarry Smith     alpha3 = x[8*i+2];
134966ed3db0SBarry Smith     alpha4 = x[8*i+3];
135066ed3db0SBarry Smith     alpha5 = x[8*i+4];
135166ed3db0SBarry Smith     alpha6 = x[8*i+5];
135266ed3db0SBarry Smith     alpha7 = x[8*i+6];
135366ed3db0SBarry Smith     alpha8 = x[8*i+7];
135466ed3db0SBarry Smith     while (n-->0) {
135566ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
135666ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
135766ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
135866ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
135966ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
136066ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
136166ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
136266ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
136366ed3db0SBarry Smith       idx++; v++;
136466ed3db0SBarry Smith     }
136566ed3db0SBarry Smith   }
13669566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0*a->nz));
13679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
13689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
13692f7816d4SBarry Smith   PetscFunctionReturn(0);
13702f7816d4SBarry Smith }
13712f7816d4SBarry Smith 
13722b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1373dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
13742b692628SMatthew Knepley {
13752b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
13762b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1377d2840e09SBarry Smith   const PetscScalar *x,*v;
1378d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1379d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1380d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
13812b692628SMatthew Knepley 
13822b692628SMatthew Knepley   PetscFunctionBegin;
13839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
13849566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
13852b692628SMatthew Knepley   idx  = a->j;
13862b692628SMatthew Knepley   v    = a->a;
13872b692628SMatthew Knepley   ii   = a->i;
13882b692628SMatthew Knepley 
13892b692628SMatthew Knepley   for (i=0; i<m; i++) {
13902b692628SMatthew Knepley     jrow  = ii[i];
13912b692628SMatthew Knepley     n     = ii[i+1] - jrow;
13922b692628SMatthew Knepley     sum1  = 0.0;
13932b692628SMatthew Knepley     sum2  = 0.0;
13942b692628SMatthew Knepley     sum3  = 0.0;
13952b692628SMatthew Knepley     sum4  = 0.0;
13962b692628SMatthew Knepley     sum5  = 0.0;
13972b692628SMatthew Knepley     sum6  = 0.0;
13982b692628SMatthew Knepley     sum7  = 0.0;
13992b692628SMatthew Knepley     sum8  = 0.0;
14002b692628SMatthew Knepley     sum9  = 0.0;
140126fbe8dcSKarl Rupp 
1402b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14032b692628SMatthew Knepley     for (j=0; j<n; j++) {
14042b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14052b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14062b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14072b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14082b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14092b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14102b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14112b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14122b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14132b692628SMatthew Knepley       jrow++;
14142b692628SMatthew Knepley     }
14152b692628SMatthew Knepley     y[9*i]   = sum1;
14162b692628SMatthew Knepley     y[9*i+1] = sum2;
14172b692628SMatthew Knepley     y[9*i+2] = sum3;
14182b692628SMatthew Knepley     y[9*i+3] = sum4;
14192b692628SMatthew Knepley     y[9*i+4] = sum5;
14202b692628SMatthew Knepley     y[9*i+5] = sum6;
14212b692628SMatthew Knepley     y[9*i+6] = sum7;
14222b692628SMatthew Knepley     y[9*i+7] = sum8;
14232b692628SMatthew Knepley     y[9*i+8] = sum9;
14242b692628SMatthew Knepley   }
14252b692628SMatthew Knepley 
14269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow));
14279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
14289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
14292b692628SMatthew Knepley   PetscFunctionReturn(0);
14302b692628SMatthew Knepley }
14312b692628SMatthew Knepley 
1432b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1433b9cda22cSBarry Smith 
1434dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14352b692628SMatthew Knepley {
14362b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14372b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1438d2840e09SBarry Smith   const PetscScalar *x,*v;
1439d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1440d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1441d2840e09SBarry Smith   PetscInt          n,i;
14422b692628SMatthew Knepley 
14432b692628SMatthew Knepley   PetscFunctionBegin;
14449566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
14459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
14469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
14472b692628SMatthew Knepley 
14482b692628SMatthew Knepley   for (i=0; i<m; i++) {
14492b692628SMatthew Knepley     idx    = a->j + a->i[i];
14502b692628SMatthew Knepley     v      = a->a + a->i[i];
14512b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14522b692628SMatthew Knepley     alpha1 = x[9*i];
14532b692628SMatthew Knepley     alpha2 = x[9*i+1];
14542b692628SMatthew Knepley     alpha3 = x[9*i+2];
14552b692628SMatthew Knepley     alpha4 = x[9*i+3];
14562b692628SMatthew Knepley     alpha5 = x[9*i+4];
14572b692628SMatthew Knepley     alpha6 = x[9*i+5];
14582b692628SMatthew Knepley     alpha7 = x[9*i+6];
14592b692628SMatthew Knepley     alpha8 = x[9*i+7];
14602f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14612b692628SMatthew Knepley     while (n-->0) {
14622b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14632b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14642b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
14652b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
14662b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
14672b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
14682b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
14692b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
14702b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
14712b692628SMatthew Knepley       idx++; v++;
14722b692628SMatthew Knepley     }
14732b692628SMatthew Knepley   }
14749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
14759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
14769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
14772b692628SMatthew Knepley   PetscFunctionReturn(0);
14782b692628SMatthew Knepley }
14792b692628SMatthew Knepley 
1480dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
14812b692628SMatthew Knepley {
14822b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14832b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1484d2840e09SBarry Smith   const PetscScalar *x,*v;
1485d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1486d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1487b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
14882b692628SMatthew Knepley 
14892b692628SMatthew Knepley   PetscFunctionBegin;
14909566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
14919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
14929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
14932b692628SMatthew Knepley   idx  = a->j;
14942b692628SMatthew Knepley   v    = a->a;
14952b692628SMatthew Knepley   ii   = a->i;
14962b692628SMatthew Knepley 
14972b692628SMatthew Knepley   for (i=0; i<m; i++) {
14982b692628SMatthew Knepley     jrow = ii[i];
14992b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15002b692628SMatthew Knepley     sum1 = 0.0;
15012b692628SMatthew Knepley     sum2 = 0.0;
15022b692628SMatthew Knepley     sum3 = 0.0;
15032b692628SMatthew Knepley     sum4 = 0.0;
15042b692628SMatthew Knepley     sum5 = 0.0;
15052b692628SMatthew Knepley     sum6 = 0.0;
15062b692628SMatthew Knepley     sum7 = 0.0;
15072b692628SMatthew Knepley     sum8 = 0.0;
15082b692628SMatthew Knepley     sum9 = 0.0;
15092b692628SMatthew Knepley     for (j=0; j<n; j++) {
15102b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15112b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15122b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15132b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15142b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15152b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15162b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15172b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15182b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15192b692628SMatthew Knepley       jrow++;
15202b692628SMatthew Knepley     }
15212b692628SMatthew Knepley     y[9*i]   += sum1;
15222b692628SMatthew Knepley     y[9*i+1] += sum2;
15232b692628SMatthew Knepley     y[9*i+2] += sum3;
15242b692628SMatthew Knepley     y[9*i+3] += sum4;
15252b692628SMatthew Knepley     y[9*i+4] += sum5;
15262b692628SMatthew Knepley     y[9*i+5] += sum6;
15272b692628SMatthew Knepley     y[9*i+6] += sum7;
15282b692628SMatthew Knepley     y[9*i+7] += sum8;
15292b692628SMatthew Knepley     y[9*i+8] += sum9;
15302b692628SMatthew Knepley   }
15312b692628SMatthew Knepley 
15329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
15339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
15349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
15352b692628SMatthew Knepley   PetscFunctionReturn(0);
15362b692628SMatthew Knepley }
15372b692628SMatthew Knepley 
1538dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15392b692628SMatthew Knepley {
15402b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15412b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1542d2840e09SBarry Smith   const PetscScalar *x,*v;
1543d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1544d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1545d2840e09SBarry Smith   PetscInt          n,i;
15462b692628SMatthew Knepley 
15472b692628SMatthew Knepley   PetscFunctionBegin;
15489566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
15499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
15509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
15512b692628SMatthew Knepley   for (i=0; i<m; i++) {
15522b692628SMatthew Knepley     idx    = a->j + a->i[i];
15532b692628SMatthew Knepley     v      = a->a + a->i[i];
15542b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15552b692628SMatthew Knepley     alpha1 = x[9*i];
15562b692628SMatthew Knepley     alpha2 = x[9*i+1];
15572b692628SMatthew Knepley     alpha3 = x[9*i+2];
15582b692628SMatthew Knepley     alpha4 = x[9*i+3];
15592b692628SMatthew Knepley     alpha5 = x[9*i+4];
15602b692628SMatthew Knepley     alpha6 = x[9*i+5];
15612b692628SMatthew Knepley     alpha7 = x[9*i+6];
15622b692628SMatthew Knepley     alpha8 = x[9*i+7];
15632b692628SMatthew Knepley     alpha9 = x[9*i+8];
15642b692628SMatthew Knepley     while (n-->0) {
15652b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15662b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15672b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15682b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15692b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15702b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15712b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15722b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15732b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15742b692628SMatthew Knepley       idx++; v++;
15752b692628SMatthew Knepley     }
15762b692628SMatthew Knepley   }
15779566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0*a->nz));
15789566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
15799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
15802b692628SMatthew Knepley   PetscFunctionReturn(0);
15812b692628SMatthew Knepley }
1582b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1583b9cda22cSBarry Smith {
1584b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1585b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1586d2840e09SBarry Smith   const PetscScalar *x,*v;
1587d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1588d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1589d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1590b9cda22cSBarry Smith 
1591b9cda22cSBarry Smith   PetscFunctionBegin;
15929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
15939566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1594b9cda22cSBarry Smith   idx  = a->j;
1595b9cda22cSBarry Smith   v    = a->a;
1596b9cda22cSBarry Smith   ii   = a->i;
1597b9cda22cSBarry Smith 
1598b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1599b9cda22cSBarry Smith     jrow  = ii[i];
1600b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1601b9cda22cSBarry Smith     sum1  = 0.0;
1602b9cda22cSBarry Smith     sum2  = 0.0;
1603b9cda22cSBarry Smith     sum3  = 0.0;
1604b9cda22cSBarry Smith     sum4  = 0.0;
1605b9cda22cSBarry Smith     sum5  = 0.0;
1606b9cda22cSBarry Smith     sum6  = 0.0;
1607b9cda22cSBarry Smith     sum7  = 0.0;
1608b9cda22cSBarry Smith     sum8  = 0.0;
1609b9cda22cSBarry Smith     sum9  = 0.0;
1610b9cda22cSBarry Smith     sum10 = 0.0;
161126fbe8dcSKarl Rupp 
1612b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1613b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1614b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1615b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1616b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1617b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1618b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1619b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1620b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1621b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1622b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1623b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1624b9cda22cSBarry Smith       jrow++;
1625b9cda22cSBarry Smith     }
1626b9cda22cSBarry Smith     y[10*i]   = sum1;
1627b9cda22cSBarry Smith     y[10*i+1] = sum2;
1628b9cda22cSBarry Smith     y[10*i+2] = sum3;
1629b9cda22cSBarry Smith     y[10*i+3] = sum4;
1630b9cda22cSBarry Smith     y[10*i+4] = sum5;
1631b9cda22cSBarry Smith     y[10*i+5] = sum6;
1632b9cda22cSBarry Smith     y[10*i+6] = sum7;
1633b9cda22cSBarry Smith     y[10*i+7] = sum8;
1634b9cda22cSBarry Smith     y[10*i+8] = sum9;
1635b9cda22cSBarry Smith     y[10*i+9] = sum10;
1636b9cda22cSBarry Smith   }
1637b9cda22cSBarry Smith 
16389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow));
16399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
16409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1641b9cda22cSBarry Smith   PetscFunctionReturn(0);
1642b9cda22cSBarry Smith }
1643b9cda22cSBarry Smith 
1644b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1645b9cda22cSBarry Smith {
1646b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1647b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1648d2840e09SBarry Smith   const PetscScalar *x,*v;
1649d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1650d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1651b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1652b9cda22cSBarry Smith 
1653b9cda22cSBarry Smith   PetscFunctionBegin;
16549566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
16559566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
16569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1657b9cda22cSBarry Smith   idx  = a->j;
1658b9cda22cSBarry Smith   v    = a->a;
1659b9cda22cSBarry Smith   ii   = a->i;
1660b9cda22cSBarry Smith 
1661b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1662b9cda22cSBarry Smith     jrow  = ii[i];
1663b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1664b9cda22cSBarry Smith     sum1  = 0.0;
1665b9cda22cSBarry Smith     sum2  = 0.0;
1666b9cda22cSBarry Smith     sum3  = 0.0;
1667b9cda22cSBarry Smith     sum4  = 0.0;
1668b9cda22cSBarry Smith     sum5  = 0.0;
1669b9cda22cSBarry Smith     sum6  = 0.0;
1670b9cda22cSBarry Smith     sum7  = 0.0;
1671b9cda22cSBarry Smith     sum8  = 0.0;
1672b9cda22cSBarry Smith     sum9  = 0.0;
1673b9cda22cSBarry Smith     sum10 = 0.0;
1674b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1675b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1676b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1677b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1678b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1679b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1680b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1681b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1682b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1683b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1684b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1685b9cda22cSBarry Smith       jrow++;
1686b9cda22cSBarry Smith     }
1687b9cda22cSBarry Smith     y[10*i]   += sum1;
1688b9cda22cSBarry Smith     y[10*i+1] += sum2;
1689b9cda22cSBarry Smith     y[10*i+2] += sum3;
1690b9cda22cSBarry Smith     y[10*i+3] += sum4;
1691b9cda22cSBarry Smith     y[10*i+4] += sum5;
1692b9cda22cSBarry Smith     y[10*i+5] += sum6;
1693b9cda22cSBarry Smith     y[10*i+6] += sum7;
1694b9cda22cSBarry Smith     y[10*i+7] += sum8;
1695b9cda22cSBarry Smith     y[10*i+8] += sum9;
1696b9cda22cSBarry Smith     y[10*i+9] += sum10;
1697b9cda22cSBarry Smith   }
1698b9cda22cSBarry Smith 
16999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1702b9cda22cSBarry Smith   PetscFunctionReturn(0);
1703b9cda22cSBarry Smith }
1704b9cda22cSBarry Smith 
1705b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1706b9cda22cSBarry Smith {
1707b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1708b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1709d2840e09SBarry Smith   const PetscScalar *x,*v;
1710d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1711d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1712d2840e09SBarry Smith   PetscInt          n,i;
1713b9cda22cSBarry Smith 
1714b9cda22cSBarry Smith   PetscFunctionBegin;
17159566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
17169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
17179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1718b9cda22cSBarry Smith 
1719b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1720b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1721b9cda22cSBarry Smith     v       = a->a + a->i[i];
1722b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1723e29fdc22SBarry Smith     alpha1  = x[10*i];
1724e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1725e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1726e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1727e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1728e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1729e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1730e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1731e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1732b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1733b9cda22cSBarry Smith     while (n-->0) {
1734e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1735e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1736e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1737e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1738e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1739e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1740e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1741e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1742e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1743b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1744b9cda22cSBarry Smith       idx++; v++;
1745b9cda22cSBarry Smith     }
1746b9cda22cSBarry Smith   }
17479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1750b9cda22cSBarry Smith   PetscFunctionReturn(0);
1751b9cda22cSBarry Smith }
1752b9cda22cSBarry Smith 
1753b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1754b9cda22cSBarry Smith {
1755b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1756b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1757d2840e09SBarry Smith   const PetscScalar *x,*v;
1758d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1759d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1760d2840e09SBarry Smith   PetscInt          n,i;
1761b9cda22cSBarry Smith 
1762b9cda22cSBarry Smith   PetscFunctionBegin;
17639566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
17649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
17659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1766b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1767b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1768b9cda22cSBarry Smith     v       = a->a + a->i[i];
1769b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1770b9cda22cSBarry Smith     alpha1  = x[10*i];
1771b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1772b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1773b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1774b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1775b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1776b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1777b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1778b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1779b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1780b9cda22cSBarry Smith     while (n-->0) {
1781b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1782b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1783b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1784b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1785b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1786b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1787b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1788b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1789b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1790b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1791b9cda22cSBarry Smith       idx++; v++;
1792b9cda22cSBarry Smith     }
1793b9cda22cSBarry Smith   }
17949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0*a->nz));
17959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
17969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
1797b9cda22cSBarry Smith   PetscFunctionReturn(0);
1798b9cda22cSBarry Smith }
1799b9cda22cSBarry Smith 
18002f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1801dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1802dbdd7285SBarry Smith {
1803dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1804dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1805d2840e09SBarry Smith   const PetscScalar *x,*v;
1806d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1807d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1808d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1809dbdd7285SBarry Smith 
1810dbdd7285SBarry Smith   PetscFunctionBegin;
18119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
18129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1813dbdd7285SBarry Smith   idx  = a->j;
1814dbdd7285SBarry Smith   v    = a->a;
1815dbdd7285SBarry Smith   ii   = a->i;
1816dbdd7285SBarry Smith 
1817dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1818dbdd7285SBarry Smith     jrow  = ii[i];
1819dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1820dbdd7285SBarry Smith     sum1  = 0.0;
1821dbdd7285SBarry Smith     sum2  = 0.0;
1822dbdd7285SBarry Smith     sum3  = 0.0;
1823dbdd7285SBarry Smith     sum4  = 0.0;
1824dbdd7285SBarry Smith     sum5  = 0.0;
1825dbdd7285SBarry Smith     sum6  = 0.0;
1826dbdd7285SBarry Smith     sum7  = 0.0;
1827dbdd7285SBarry Smith     sum8  = 0.0;
1828dbdd7285SBarry Smith     sum9  = 0.0;
1829dbdd7285SBarry Smith     sum10 = 0.0;
1830dbdd7285SBarry Smith     sum11 = 0.0;
183126fbe8dcSKarl Rupp 
1832dbdd7285SBarry Smith     nonzerorow += (n>0);
1833dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1834dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1835dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1836dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1837dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1838dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1839dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1840dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1841dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1842dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1843dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1844dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1845dbdd7285SBarry Smith       jrow++;
1846dbdd7285SBarry Smith     }
1847dbdd7285SBarry Smith     y[11*i]    = sum1;
1848dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1849dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1850dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1851dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1852dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1853dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1854dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1855dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1856dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1857dbdd7285SBarry Smith     y[11*i+10] = sum11;
1858dbdd7285SBarry Smith   }
1859dbdd7285SBarry Smith 
18609566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow));
18619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
18629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1863dbdd7285SBarry Smith   PetscFunctionReturn(0);
1864dbdd7285SBarry Smith }
1865dbdd7285SBarry Smith 
1866dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1867dbdd7285SBarry Smith {
1868dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1869dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1870d2840e09SBarry Smith   const PetscScalar *x,*v;
1871d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1872d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1873dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1874dbdd7285SBarry Smith 
1875dbdd7285SBarry Smith   PetscFunctionBegin;
18769566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
18779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
18789566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1879dbdd7285SBarry Smith   idx  = a->j;
1880dbdd7285SBarry Smith   v    = a->a;
1881dbdd7285SBarry Smith   ii   = a->i;
1882dbdd7285SBarry Smith 
1883dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1884dbdd7285SBarry Smith     jrow  = ii[i];
1885dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1886dbdd7285SBarry Smith     sum1  = 0.0;
1887dbdd7285SBarry Smith     sum2  = 0.0;
1888dbdd7285SBarry Smith     sum3  = 0.0;
1889dbdd7285SBarry Smith     sum4  = 0.0;
1890dbdd7285SBarry Smith     sum5  = 0.0;
1891dbdd7285SBarry Smith     sum6  = 0.0;
1892dbdd7285SBarry Smith     sum7  = 0.0;
1893dbdd7285SBarry Smith     sum8  = 0.0;
1894dbdd7285SBarry Smith     sum9  = 0.0;
1895dbdd7285SBarry Smith     sum10 = 0.0;
1896dbdd7285SBarry Smith     sum11 = 0.0;
1897dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1898dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1899dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1900dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1901dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1902dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1903dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1904dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1905dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1906dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1907dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1908dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1909dbdd7285SBarry Smith       jrow++;
1910dbdd7285SBarry Smith     }
1911dbdd7285SBarry Smith     y[11*i]    += sum1;
1912dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1913dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1914dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1915dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1916dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1917dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1918dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1919dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1920dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1921dbdd7285SBarry Smith     y[11*i+10] += sum11;
1922dbdd7285SBarry Smith   }
1923dbdd7285SBarry Smith 
19249566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
19259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
19269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1927dbdd7285SBarry Smith   PetscFunctionReturn(0);
1928dbdd7285SBarry Smith }
1929dbdd7285SBarry Smith 
1930dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1931dbdd7285SBarry Smith {
1932dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1933dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1934d2840e09SBarry Smith   const PetscScalar *x,*v;
1935d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1936d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1937d2840e09SBarry Smith   PetscInt          n,i;
1938dbdd7285SBarry Smith 
1939dbdd7285SBarry Smith   PetscFunctionBegin;
19409566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
19419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
19429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
1943dbdd7285SBarry Smith 
1944dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1945dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1946dbdd7285SBarry Smith     v       = a->a + a->i[i];
1947dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1948dbdd7285SBarry Smith     alpha1  = x[11*i];
1949dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1950dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1951dbdd7285SBarry Smith     alpha4  = x[11*i+3];
1952dbdd7285SBarry Smith     alpha5  = x[11*i+4];
1953dbdd7285SBarry Smith     alpha6  = x[11*i+5];
1954dbdd7285SBarry Smith     alpha7  = x[11*i+6];
1955dbdd7285SBarry Smith     alpha8  = x[11*i+7];
1956dbdd7285SBarry Smith     alpha9  = x[11*i+8];
1957dbdd7285SBarry Smith     alpha10 = x[11*i+9];
1958dbdd7285SBarry Smith     alpha11 = x[11*i+10];
1959dbdd7285SBarry Smith     while (n-->0) {
1960dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
1961dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
1962dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
1963dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
1964dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
1965dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
1966dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
1967dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
1968dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
1969dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
1970dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
1971dbdd7285SBarry Smith       idx++; v++;
1972dbdd7285SBarry Smith     }
1973dbdd7285SBarry Smith   }
19749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
19759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
19769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
1977dbdd7285SBarry Smith   PetscFunctionReturn(0);
1978dbdd7285SBarry Smith }
1979dbdd7285SBarry Smith 
1980dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1981dbdd7285SBarry Smith {
1982dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1983dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1984d2840e09SBarry Smith   const PetscScalar *x,*v;
1985d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1986d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1987d2840e09SBarry Smith   PetscInt          n,i;
1988dbdd7285SBarry Smith 
1989dbdd7285SBarry Smith   PetscFunctionBegin;
19909566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
19919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
19929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
1993dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1994dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1995dbdd7285SBarry Smith     v       = a->a + a->i[i];
1996dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1997dbdd7285SBarry Smith     alpha1  = x[11*i];
1998dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1999dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2000dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2001dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2002dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2003dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2004dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2005dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2006dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2007dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2008dbdd7285SBarry Smith     while (n-->0) {
2009dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2010dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2017dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2018dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2019dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2020dbdd7285SBarry Smith       idx++; v++;
2021dbdd7285SBarry Smith     }
2022dbdd7285SBarry Smith   }
20239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0*a->nz));
20249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
20259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2026dbdd7285SBarry Smith   PetscFunctionReturn(0);
2027dbdd7285SBarry Smith }
2028dbdd7285SBarry Smith 
2029dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2030dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20312f7816d4SBarry Smith {
20322f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20332f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2034d2840e09SBarry Smith   const PetscScalar *x,*v;
2035d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20362f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2037d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2038d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20392f7816d4SBarry Smith 
20402f7816d4SBarry Smith   PetscFunctionBegin;
20419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
20429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
20432f7816d4SBarry Smith   idx  = a->j;
20442f7816d4SBarry Smith   v    = a->a;
20452f7816d4SBarry Smith   ii   = a->i;
20462f7816d4SBarry Smith 
20472f7816d4SBarry Smith   for (i=0; i<m; i++) {
20482f7816d4SBarry Smith     jrow  = ii[i];
20492f7816d4SBarry Smith     n     = ii[i+1] - jrow;
20502f7816d4SBarry Smith     sum1  = 0.0;
20512f7816d4SBarry Smith     sum2  = 0.0;
20522f7816d4SBarry Smith     sum3  = 0.0;
20532f7816d4SBarry Smith     sum4  = 0.0;
20542f7816d4SBarry Smith     sum5  = 0.0;
20552f7816d4SBarry Smith     sum6  = 0.0;
20562f7816d4SBarry Smith     sum7  = 0.0;
20572f7816d4SBarry Smith     sum8  = 0.0;
20582f7816d4SBarry Smith     sum9  = 0.0;
20592f7816d4SBarry Smith     sum10 = 0.0;
20602f7816d4SBarry Smith     sum11 = 0.0;
20612f7816d4SBarry Smith     sum12 = 0.0;
20622f7816d4SBarry Smith     sum13 = 0.0;
20632f7816d4SBarry Smith     sum14 = 0.0;
20642f7816d4SBarry Smith     sum15 = 0.0;
20652f7816d4SBarry Smith     sum16 = 0.0;
206626fbe8dcSKarl Rupp 
2067b3c51c66SMatthew Knepley     nonzerorow += (n>0);
20682f7816d4SBarry Smith     for (j=0; j<n; j++) {
20692f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
20702f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
20712f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
20722f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
20732f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
20742f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
20752f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
20762f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
20772f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
20782f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
20792f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
20802f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
20812f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
20822f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
20832f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
20842f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
20852f7816d4SBarry Smith       jrow++;
20862f7816d4SBarry Smith     }
20872f7816d4SBarry Smith     y[16*i]    = sum1;
20882f7816d4SBarry Smith     y[16*i+1]  = sum2;
20892f7816d4SBarry Smith     y[16*i+2]  = sum3;
20902f7816d4SBarry Smith     y[16*i+3]  = sum4;
20912f7816d4SBarry Smith     y[16*i+4]  = sum5;
20922f7816d4SBarry Smith     y[16*i+5]  = sum6;
20932f7816d4SBarry Smith     y[16*i+6]  = sum7;
20942f7816d4SBarry Smith     y[16*i+7]  = sum8;
20952f7816d4SBarry Smith     y[16*i+8]  = sum9;
20962f7816d4SBarry Smith     y[16*i+9]  = sum10;
20972f7816d4SBarry Smith     y[16*i+10] = sum11;
20982f7816d4SBarry Smith     y[16*i+11] = sum12;
20992f7816d4SBarry Smith     y[16*i+12] = sum13;
21002f7816d4SBarry Smith     y[16*i+13] = sum14;
21012f7816d4SBarry Smith     y[16*i+14] = sum15;
21022f7816d4SBarry Smith     y[16*i+15] = sum16;
21032f7816d4SBarry Smith   }
21042f7816d4SBarry Smith 
21059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow));
21069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
21079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
21082f7816d4SBarry Smith   PetscFunctionReturn(0);
21092f7816d4SBarry Smith }
21102f7816d4SBarry Smith 
2111dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21122f7816d4SBarry Smith {
21132f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21142f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2115d2840e09SBarry Smith   const PetscScalar *x,*v;
2116d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21172f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2118d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2119d2840e09SBarry Smith   PetscInt          n,i;
21202f7816d4SBarry Smith 
21212f7816d4SBarry Smith   PetscFunctionBegin;
21229566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
21239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
21249566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2125bfec09a0SHong Zhang 
21262f7816d4SBarry Smith   for (i=0; i<m; i++) {
2127bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2128bfec09a0SHong Zhang     v       = a->a + a->i[i];
21292f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21302f7816d4SBarry Smith     alpha1  = x[16*i];
21312f7816d4SBarry Smith     alpha2  = x[16*i+1];
21322f7816d4SBarry Smith     alpha3  = x[16*i+2];
21332f7816d4SBarry Smith     alpha4  = x[16*i+3];
21342f7816d4SBarry Smith     alpha5  = x[16*i+4];
21352f7816d4SBarry Smith     alpha6  = x[16*i+5];
21362f7816d4SBarry Smith     alpha7  = x[16*i+6];
21372f7816d4SBarry Smith     alpha8  = x[16*i+7];
21382f7816d4SBarry Smith     alpha9  = x[16*i+8];
21392f7816d4SBarry Smith     alpha10 = x[16*i+9];
21402f7816d4SBarry Smith     alpha11 = x[16*i+10];
21412f7816d4SBarry Smith     alpha12 = x[16*i+11];
21422f7816d4SBarry Smith     alpha13 = x[16*i+12];
21432f7816d4SBarry Smith     alpha14 = x[16*i+13];
21442f7816d4SBarry Smith     alpha15 = x[16*i+14];
21452f7816d4SBarry Smith     alpha16 = x[16*i+15];
21462f7816d4SBarry Smith     while (n-->0) {
21472f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
21482f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
21492f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
21502f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
21512f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
21522f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
21532f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
21542f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
21552f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
21562f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
21572f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
21582f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
21592f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
21602f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
21612f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
21622f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
21632f7816d4SBarry Smith       idx++; v++;
21642f7816d4SBarry Smith     }
21652f7816d4SBarry Smith   }
21669566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
21679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
21689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
21692f7816d4SBarry Smith   PetscFunctionReturn(0);
21702f7816d4SBarry Smith }
21712f7816d4SBarry Smith 
2172dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
21732f7816d4SBarry Smith {
21742f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21752f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2176d2840e09SBarry Smith   const PetscScalar *x,*v;
2177d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21782f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2179d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2180b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
21812f7816d4SBarry Smith 
21822f7816d4SBarry Smith   PetscFunctionBegin;
21839566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
21849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
21859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
21862f7816d4SBarry Smith   idx  = a->j;
21872f7816d4SBarry Smith   v    = a->a;
21882f7816d4SBarry Smith   ii   = a->i;
21892f7816d4SBarry Smith 
21902f7816d4SBarry Smith   for (i=0; i<m; i++) {
21912f7816d4SBarry Smith     jrow  = ii[i];
21922f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21932f7816d4SBarry Smith     sum1  = 0.0;
21942f7816d4SBarry Smith     sum2  = 0.0;
21952f7816d4SBarry Smith     sum3  = 0.0;
21962f7816d4SBarry Smith     sum4  = 0.0;
21972f7816d4SBarry Smith     sum5  = 0.0;
21982f7816d4SBarry Smith     sum6  = 0.0;
21992f7816d4SBarry Smith     sum7  = 0.0;
22002f7816d4SBarry Smith     sum8  = 0.0;
22012f7816d4SBarry Smith     sum9  = 0.0;
22022f7816d4SBarry Smith     sum10 = 0.0;
22032f7816d4SBarry Smith     sum11 = 0.0;
22042f7816d4SBarry Smith     sum12 = 0.0;
22052f7816d4SBarry Smith     sum13 = 0.0;
22062f7816d4SBarry Smith     sum14 = 0.0;
22072f7816d4SBarry Smith     sum15 = 0.0;
22082f7816d4SBarry Smith     sum16 = 0.0;
22092f7816d4SBarry Smith     for (j=0; j<n; j++) {
22102f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22112f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22122f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22132f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22142f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22152f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22162f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22172f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22182f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22192f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22202f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22212f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22222f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22232f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22242f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22252f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22262f7816d4SBarry Smith       jrow++;
22272f7816d4SBarry Smith     }
22282f7816d4SBarry Smith     y[16*i]    += sum1;
22292f7816d4SBarry Smith     y[16*i+1]  += sum2;
22302f7816d4SBarry Smith     y[16*i+2]  += sum3;
22312f7816d4SBarry Smith     y[16*i+3]  += sum4;
22322f7816d4SBarry Smith     y[16*i+4]  += sum5;
22332f7816d4SBarry Smith     y[16*i+5]  += sum6;
22342f7816d4SBarry Smith     y[16*i+6]  += sum7;
22352f7816d4SBarry Smith     y[16*i+7]  += sum8;
22362f7816d4SBarry Smith     y[16*i+8]  += sum9;
22372f7816d4SBarry Smith     y[16*i+9]  += sum10;
22382f7816d4SBarry Smith     y[16*i+10] += sum11;
22392f7816d4SBarry Smith     y[16*i+11] += sum12;
22402f7816d4SBarry Smith     y[16*i+12] += sum13;
22412f7816d4SBarry Smith     y[16*i+13] += sum14;
22422f7816d4SBarry Smith     y[16*i+14] += sum15;
22432f7816d4SBarry Smith     y[16*i+15] += sum16;
22442f7816d4SBarry Smith   }
22452f7816d4SBarry Smith 
22469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
22479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
22489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
22492f7816d4SBarry Smith   PetscFunctionReturn(0);
22502f7816d4SBarry Smith }
22512f7816d4SBarry Smith 
2252dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22532f7816d4SBarry Smith {
22542f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22552f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2256d2840e09SBarry Smith   const PetscScalar *x,*v;
2257d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22582f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2259d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2260d2840e09SBarry Smith   PetscInt          n,i;
22612f7816d4SBarry Smith 
22622f7816d4SBarry Smith   PetscFunctionBegin;
22639566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
22649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
22659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
22662f7816d4SBarry Smith   for (i=0; i<m; i++) {
2267bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2268bfec09a0SHong Zhang     v       = a->a + a->i[i];
22692f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
22702f7816d4SBarry Smith     alpha1  = x[16*i];
22712f7816d4SBarry Smith     alpha2  = x[16*i+1];
22722f7816d4SBarry Smith     alpha3  = x[16*i+2];
22732f7816d4SBarry Smith     alpha4  = x[16*i+3];
22742f7816d4SBarry Smith     alpha5  = x[16*i+4];
22752f7816d4SBarry Smith     alpha6  = x[16*i+5];
22762f7816d4SBarry Smith     alpha7  = x[16*i+6];
22772f7816d4SBarry Smith     alpha8  = x[16*i+7];
22782f7816d4SBarry Smith     alpha9  = x[16*i+8];
22792f7816d4SBarry Smith     alpha10 = x[16*i+9];
22802f7816d4SBarry Smith     alpha11 = x[16*i+10];
22812f7816d4SBarry Smith     alpha12 = x[16*i+11];
22822f7816d4SBarry Smith     alpha13 = x[16*i+12];
22832f7816d4SBarry Smith     alpha14 = x[16*i+13];
22842f7816d4SBarry Smith     alpha15 = x[16*i+14];
22852f7816d4SBarry Smith     alpha16 = x[16*i+15];
22862f7816d4SBarry Smith     while (n-->0) {
22872f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22892f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22902f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22912f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22922f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22932f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22942f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22952f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22962f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22972f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22982f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22992f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23002f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23012f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23022f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23032f7816d4SBarry Smith       idx++; v++;
23042f7816d4SBarry Smith     }
23052f7816d4SBarry Smith   }
23069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0*a->nz));
23079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
23089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
230966ed3db0SBarry Smith   PetscFunctionReturn(0);
231066ed3db0SBarry Smith }
231166ed3db0SBarry Smith 
2312ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2313ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2314ed1c418cSBarry Smith {
2315ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2316ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2317d2840e09SBarry Smith   const PetscScalar *x,*v;
2318d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2319ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2320d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2321d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2322ed1c418cSBarry Smith 
2323ed1c418cSBarry Smith   PetscFunctionBegin;
23249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
23259566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2326ed1c418cSBarry Smith   idx  = a->j;
2327ed1c418cSBarry Smith   v    = a->a;
2328ed1c418cSBarry Smith   ii   = a->i;
2329ed1c418cSBarry Smith 
2330ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2331ed1c418cSBarry Smith     jrow  = ii[i];
2332ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2333ed1c418cSBarry Smith     sum1  = 0.0;
2334ed1c418cSBarry Smith     sum2  = 0.0;
2335ed1c418cSBarry Smith     sum3  = 0.0;
2336ed1c418cSBarry Smith     sum4  = 0.0;
2337ed1c418cSBarry Smith     sum5  = 0.0;
2338ed1c418cSBarry Smith     sum6  = 0.0;
2339ed1c418cSBarry Smith     sum7  = 0.0;
2340ed1c418cSBarry Smith     sum8  = 0.0;
2341ed1c418cSBarry Smith     sum9  = 0.0;
2342ed1c418cSBarry Smith     sum10 = 0.0;
2343ed1c418cSBarry Smith     sum11 = 0.0;
2344ed1c418cSBarry Smith     sum12 = 0.0;
2345ed1c418cSBarry Smith     sum13 = 0.0;
2346ed1c418cSBarry Smith     sum14 = 0.0;
2347ed1c418cSBarry Smith     sum15 = 0.0;
2348ed1c418cSBarry Smith     sum16 = 0.0;
2349ed1c418cSBarry Smith     sum17 = 0.0;
2350ed1c418cSBarry Smith     sum18 = 0.0;
235126fbe8dcSKarl Rupp 
2352ed1c418cSBarry Smith     nonzerorow += (n>0);
2353ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2354ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2355ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2356ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2357ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2358ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2359ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2360ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2361ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2362ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2363ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2364ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2365ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2366ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2367ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2368ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2369ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2370ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2371ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2372ed1c418cSBarry Smith       jrow++;
2373ed1c418cSBarry Smith     }
2374ed1c418cSBarry Smith     y[18*i]    = sum1;
2375ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2376ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2377ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2378ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2379ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2380ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2381ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2382ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2383ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2384ed1c418cSBarry Smith     y[18*i+10] = sum11;
2385ed1c418cSBarry Smith     y[18*i+11] = sum12;
2386ed1c418cSBarry Smith     y[18*i+12] = sum13;
2387ed1c418cSBarry Smith     y[18*i+13] = sum14;
2388ed1c418cSBarry Smith     y[18*i+14] = sum15;
2389ed1c418cSBarry Smith     y[18*i+15] = sum16;
2390ed1c418cSBarry Smith     y[18*i+16] = sum17;
2391ed1c418cSBarry Smith     y[18*i+17] = sum18;
2392ed1c418cSBarry Smith   }
2393ed1c418cSBarry Smith 
23949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow));
23959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
23969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
2397ed1c418cSBarry Smith   PetscFunctionReturn(0);
2398ed1c418cSBarry Smith }
2399ed1c418cSBarry Smith 
2400ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2401ed1c418cSBarry Smith {
2402ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2403ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2404d2840e09SBarry Smith   const PetscScalar *x,*v;
2405d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2406ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2407d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2408d2840e09SBarry Smith   PetscInt          n,i;
2409ed1c418cSBarry Smith 
2410ed1c418cSBarry Smith   PetscFunctionBegin;
24119566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
24129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
24139566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
2414ed1c418cSBarry Smith 
2415ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2416ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2417ed1c418cSBarry Smith     v       = a->a + a->i[i];
2418ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2419ed1c418cSBarry Smith     alpha1  = x[18*i];
2420ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2421ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2422ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2423ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2424ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2425ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2426ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2427ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2428ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2429ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2430ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2431ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2432ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2433ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2434ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2435ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2436ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2437ed1c418cSBarry Smith     while (n-->0) {
2438ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2439ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2440ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2441ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2442ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2443ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2444ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2445ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2446ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2447ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2448ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2449ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2450ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2451ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2452ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2453ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2454ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2455ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2456ed1c418cSBarry Smith       idx++; v++;
2457ed1c418cSBarry Smith     }
2458ed1c418cSBarry Smith   }
24599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
24609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
24619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
2462ed1c418cSBarry Smith   PetscFunctionReturn(0);
2463ed1c418cSBarry Smith }
2464ed1c418cSBarry Smith 
2465ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2466ed1c418cSBarry Smith {
2467ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2468ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2469d2840e09SBarry Smith   const PetscScalar *x,*v;
2470d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2471ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2472d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2473ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2474ed1c418cSBarry Smith 
2475ed1c418cSBarry Smith   PetscFunctionBegin;
24769566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
24779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
24789566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
2479ed1c418cSBarry Smith   idx  = a->j;
2480ed1c418cSBarry Smith   v    = a->a;
2481ed1c418cSBarry Smith   ii   = a->i;
2482ed1c418cSBarry Smith 
2483ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2484ed1c418cSBarry Smith     jrow  = ii[i];
2485ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2486ed1c418cSBarry Smith     sum1  = 0.0;
2487ed1c418cSBarry Smith     sum2  = 0.0;
2488ed1c418cSBarry Smith     sum3  = 0.0;
2489ed1c418cSBarry Smith     sum4  = 0.0;
2490ed1c418cSBarry Smith     sum5  = 0.0;
2491ed1c418cSBarry Smith     sum6  = 0.0;
2492ed1c418cSBarry Smith     sum7  = 0.0;
2493ed1c418cSBarry Smith     sum8  = 0.0;
2494ed1c418cSBarry Smith     sum9  = 0.0;
2495ed1c418cSBarry Smith     sum10 = 0.0;
2496ed1c418cSBarry Smith     sum11 = 0.0;
2497ed1c418cSBarry Smith     sum12 = 0.0;
2498ed1c418cSBarry Smith     sum13 = 0.0;
2499ed1c418cSBarry Smith     sum14 = 0.0;
2500ed1c418cSBarry Smith     sum15 = 0.0;
2501ed1c418cSBarry Smith     sum16 = 0.0;
2502ed1c418cSBarry Smith     sum17 = 0.0;
2503ed1c418cSBarry Smith     sum18 = 0.0;
2504ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2505ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2506ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2507ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2508ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2509ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2510ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2511ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2512ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2513ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2514ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2515ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2516ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2517ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2518ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2519ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2520ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2521ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2522ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2523ed1c418cSBarry Smith       jrow++;
2524ed1c418cSBarry Smith     }
2525ed1c418cSBarry Smith     y[18*i]    += sum1;
2526ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2527ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2528ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2529ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2530ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2531ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2532ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2533ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2534ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2535ed1c418cSBarry Smith     y[18*i+10] += sum11;
2536ed1c418cSBarry Smith     y[18*i+11] += sum12;
2537ed1c418cSBarry Smith     y[18*i+12] += sum13;
2538ed1c418cSBarry Smith     y[18*i+13] += sum14;
2539ed1c418cSBarry Smith     y[18*i+14] += sum15;
2540ed1c418cSBarry Smith     y[18*i+15] += sum16;
2541ed1c418cSBarry Smith     y[18*i+16] += sum17;
2542ed1c418cSBarry Smith     y[18*i+17] += sum18;
2543ed1c418cSBarry Smith   }
2544ed1c418cSBarry Smith 
25459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
25469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
25479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2548ed1c418cSBarry Smith   PetscFunctionReturn(0);
2549ed1c418cSBarry Smith }
2550ed1c418cSBarry Smith 
2551ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2552ed1c418cSBarry Smith {
2553ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2554ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2555d2840e09SBarry Smith   const PetscScalar *x,*v;
2556d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2557ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2558d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2559d2840e09SBarry Smith   PetscInt          n,i;
2560ed1c418cSBarry Smith 
2561ed1c418cSBarry Smith   PetscFunctionBegin;
25629566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
25639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
25649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
2565ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2566ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2567ed1c418cSBarry Smith     v       = a->a + a->i[i];
2568ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2569ed1c418cSBarry Smith     alpha1  = x[18*i];
2570ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2571ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2572ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2573ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2574ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2575ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2576ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2577ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2578ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2579ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2580ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2581ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2582ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2583ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2584ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2585ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2586ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2587ed1c418cSBarry Smith     while (n-->0) {
2588ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2593ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2594ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2595ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2596ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2597ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2598ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2599ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2600ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2601ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2602ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2603ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2604ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2605ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2606ed1c418cSBarry Smith       idx++; v++;
2607ed1c418cSBarry Smith     }
2608ed1c418cSBarry Smith   }
26099566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0*a->nz));
26109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
2612ed1c418cSBarry Smith   PetscFunctionReturn(0);
2613ed1c418cSBarry Smith }
2614ed1c418cSBarry Smith 
26156861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26166861f193SBarry Smith {
26176861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26186861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26196861f193SBarry Smith   const PetscScalar *x,*v;
26206861f193SBarry Smith   PetscScalar       *y,*sums;
26216861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26226861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26236861f193SBarry Smith 
26246861f193SBarry Smith   PetscFunctionBegin;
26259566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26269566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
26279566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
26286861f193SBarry Smith   idx  = a->j;
26296861f193SBarry Smith   v    = a->a;
26306861f193SBarry Smith   ii   = a->i;
26316861f193SBarry Smith 
26326861f193SBarry Smith   for (i=0; i<m; i++) {
26336861f193SBarry Smith     jrow = ii[i];
26346861f193SBarry Smith     n    = ii[i+1] - jrow;
26356861f193SBarry Smith     sums = y + dof*i;
26366861f193SBarry Smith     for (j=0; j<n; j++) {
26376861f193SBarry Smith       for (k=0; k<dof; k++) {
26386861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26396861f193SBarry Smith       }
26406861f193SBarry Smith       jrow++;
26416861f193SBarry Smith     }
26426861f193SBarry Smith   }
26436861f193SBarry Smith 
26449566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
26459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
26476861f193SBarry Smith   PetscFunctionReturn(0);
26486861f193SBarry Smith }
26496861f193SBarry Smith 
26506861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
26516861f193SBarry Smith {
26526861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26536861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26546861f193SBarry Smith   const PetscScalar *x,*v;
26556861f193SBarry Smith   PetscScalar       *y,*sums;
26566861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26576861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26586861f193SBarry Smith 
26596861f193SBarry Smith   PetscFunctionBegin;
26609566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
26619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26629566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
26636861f193SBarry Smith   idx  = a->j;
26646861f193SBarry Smith   v    = a->a;
26656861f193SBarry Smith   ii   = a->i;
26666861f193SBarry Smith 
26676861f193SBarry Smith   for (i=0; i<m; i++) {
26686861f193SBarry Smith     jrow = ii[i];
26696861f193SBarry Smith     n    = ii[i+1] - jrow;
26706861f193SBarry Smith     sums = y + dof*i;
26716861f193SBarry Smith     for (j=0; j<n; j++) {
26726861f193SBarry Smith       for (k=0; k<dof; k++) {
26736861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26746861f193SBarry Smith       }
26756861f193SBarry Smith       jrow++;
26766861f193SBarry Smith     }
26776861f193SBarry Smith   }
26786861f193SBarry Smith 
26799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
26809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
26819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
26826861f193SBarry Smith   PetscFunctionReturn(0);
26836861f193SBarry Smith }
26846861f193SBarry Smith 
26856861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26866861f193SBarry Smith {
26876861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26886861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26896861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
26906861f193SBarry Smith   PetscScalar       *y;
26916861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
26926861f193SBarry Smith   PetscInt          n,i,k;
26936861f193SBarry Smith 
26946861f193SBarry Smith   PetscFunctionBegin;
26959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
26969566063dSJacob Faibussowitsch   PetscCall(VecSet(yy,0.0));
26979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
26986861f193SBarry Smith   for (i=0; i<m; i++) {
26996861f193SBarry Smith     idx   = a->j + a->i[i];
27006861f193SBarry Smith     v     = a->a + a->i[i];
27016861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27026861f193SBarry Smith     alpha = x + dof*i;
27036861f193SBarry Smith     while (n-->0) {
27046861f193SBarry Smith       for (k=0; k<dof; k++) {
27056861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27066861f193SBarry Smith       }
270783ce7366SBarry Smith       idx++; v++;
27086861f193SBarry Smith     }
27096861f193SBarry Smith   }
27109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
27119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
27129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
27136861f193SBarry Smith   PetscFunctionReturn(0);
27146861f193SBarry Smith }
27156861f193SBarry Smith 
27166861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27176861f193SBarry Smith {
27186861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27196861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27206861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27216861f193SBarry Smith   PetscScalar       *y;
27226861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27236861f193SBarry Smith   PetscInt          n,i,k;
27246861f193SBarry Smith 
27256861f193SBarry Smith   PetscFunctionBegin;
27269566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy,zz));
27279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
27289566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz,&y));
27296861f193SBarry Smith   for (i=0; i<m; i++) {
27306861f193SBarry Smith     idx   = a->j + a->i[i];
27316861f193SBarry Smith     v     = a->a + a->i[i];
27326861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27336861f193SBarry Smith     alpha = x + dof*i;
27346861f193SBarry Smith     while (n-->0) {
27356861f193SBarry Smith       for (k=0; k<dof; k++) {
27366861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27376861f193SBarry Smith       }
273883ce7366SBarry Smith       idx++; v++;
27396861f193SBarry Smith     }
27406861f193SBarry Smith   }
27419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*dof*a->nz));
27429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
27439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz,&y));
27446861f193SBarry Smith   PetscFunctionReturn(0);
27456861f193SBarry Smith }
27466861f193SBarry Smith 
2747f4a53059SBarry Smith /*===================================================================================*/
2748dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2749f4a53059SBarry Smith {
27504cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2751f4a53059SBarry Smith 
2752b24ad042SBarry Smith   PetscFunctionBegin;
2753f4a53059SBarry Smith   /* start the scatter */
27549566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27559566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy));
27569566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27579566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy));
2758f4a53059SBarry Smith   PetscFunctionReturn(0);
2759f4a53059SBarry Smith }
2760f4a53059SBarry Smith 
2761dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
27624cb9d9b8SBarry Smith {
27634cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2764b24ad042SBarry Smith 
27654cb9d9b8SBarry Smith   PetscFunctionBegin;
27669566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
27679566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy));
27689566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
27699566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
27704cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27714cb9d9b8SBarry Smith }
27724cb9d9b8SBarry Smith 
2773dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
27744cb9d9b8SBarry Smith {
27754cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
27764cb9d9b8SBarry Smith 
2777b24ad042SBarry Smith   PetscFunctionBegin;
27784cb9d9b8SBarry Smith   /* start the scatter */
27799566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27809566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz));
27819566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
27829566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
27834cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27844cb9d9b8SBarry Smith }
27854cb9d9b8SBarry Smith 
2786dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
27874cb9d9b8SBarry Smith {
27884cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2789b24ad042SBarry Smith 
27904cb9d9b8SBarry Smith   PetscFunctionBegin;
27919566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
27929566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz));
27939566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
27949566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
27954cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27964cb9d9b8SBarry Smith }
27974cb9d9b8SBarry Smith 
279895ddefa2SHong Zhang /* ----------------------------------------------------------------*/
27994222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
28004222ddf1SHong Zhang {
28014222ddf1SHong Zhang   Mat_Product    *product = C->product;
28024222ddf1SHong Zhang 
28034222ddf1SHong Zhang   PetscFunctionBegin;
28044222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
28054222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
280698921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
28074222ddf1SHong Zhang   PetscFunctionReturn(0);
28084222ddf1SHong Zhang }
28094222ddf1SHong Zhang 
28104222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
28114222ddf1SHong Zhang {
28124222ddf1SHong Zhang   PetscErrorCode ierr;
28134222ddf1SHong Zhang   Mat_Product    *product = C->product;
28144222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
28154222ddf1SHong Zhang   Mat            A=product->A,P=product->B;
28164222ddf1SHong Zhang   PetscInt       alg=1; /* set default algorithm */
28174222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
28184222ddf1SHong Zhang   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
28194222ddf1SHong Zhang   PetscInt       nalg=4;
28204222ddf1SHong Zhang #else
28214222ddf1SHong Zhang   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
28224222ddf1SHong Zhang   PetscInt       nalg=5;
28234222ddf1SHong Zhang #endif
28244222ddf1SHong Zhang 
28254222ddf1SHong Zhang   PetscFunctionBegin;
2826*08401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
28274222ddf1SHong Zhang 
28284222ddf1SHong Zhang   /* PtAP */
28294222ddf1SHong Zhang   /* Check matrix local sizes */
28302c71b3e2SJacob Faibussowitsch   PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
28312c71b3e2SJacob Faibussowitsch   PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
28324222ddf1SHong Zhang 
28334222ddf1SHong Zhang   /* Set the default algorithm */
28349566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
28354222ddf1SHong Zhang   if (flg) {
28369566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
28374222ddf1SHong Zhang   }
28384222ddf1SHong Zhang 
28394222ddf1SHong Zhang   /* Get runtime option */
28409566063dSJacob Faibussowitsch   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr);
28419566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg));
28424222ddf1SHong Zhang   if (flg) {
28439566063dSJacob Faibussowitsch     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
28444222ddf1SHong Zhang   }
28459566063dSJacob Faibussowitsch   ierr = PetscOptionsEnd();PetscCall(ierr);
28464222ddf1SHong Zhang 
28479566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg));
28484222ddf1SHong Zhang   if (flg) {
28494222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28504222ddf1SHong Zhang     PetscFunctionReturn(0);
28514222ddf1SHong Zhang   }
28524222ddf1SHong Zhang 
28539566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg));
28544222ddf1SHong Zhang   if (flg) {
28554222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28564222ddf1SHong Zhang     PetscFunctionReturn(0);
28574222ddf1SHong Zhang   }
28584222ddf1SHong Zhang 
28594222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
28609566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
28619566063dSJacob Faibussowitsch   PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P));
28629566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
28634222ddf1SHong Zhang   PetscFunctionReturn(0);
28644222ddf1SHong Zhang }
28654222ddf1SHong Zhang 
28664222ddf1SHong Zhang /* ----------------------------------------------------------------*/
28674222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
28687ba1a0bfSKris Buschelman {
28690298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
28707ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
28717ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
28727ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2873d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
28747ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2875d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2876d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
28777ba1a0bfSKris Buschelman   MatScalar          *ca;
2878d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
28797ba1a0bfSKris Buschelman 
28807ba1a0bfSKris Buschelman   PetscFunctionBegin;
28817ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28829566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
28837ba1a0bfSKris Buschelman 
28847ba1a0bfSKris Buschelman   cn = pn*ppdof;
28857ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28867ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn+1,&ci));
28887ba1a0bfSKris Buschelman   ci[0] = 0;
28897ba1a0bfSKris Buschelman 
28907ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow));
28929566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow,an));
28939566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow,cn));
28947ba1a0bfSKris Buschelman 
28957ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28967ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28977ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28989566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space));
28997ba1a0bfSKris Buschelman   current_space = free_space;
29007ba1a0bfSKris Buschelman 
29017ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29027ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29037ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29047ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29057ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29067ba1a0bfSKris Buschelman       ptanzi = 0;
29077ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29087ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29097ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29107ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29117ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29127ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29137ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29147ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29157ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29167ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29177ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29187ba1a0bfSKris Buschelman           }
29197ba1a0bfSKris Buschelman         }
29207ba1a0bfSKris Buschelman       }
29217ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29227ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29237ba1a0bfSKris Buschelman       cnzi = 0;
29247ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29257ba1a0bfSKris Buschelman         /* Get offset within block of P */
29267ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29277ba1a0bfSKris Buschelman         /* Get block row of P */
29287ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29297ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29307ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29317ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29327ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29337ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29347ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29357ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29367ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29377ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
29387ba1a0bfSKris Buschelman           }
29397ba1a0bfSKris Buschelman         }
29407ba1a0bfSKris Buschelman       }
29417ba1a0bfSKris Buschelman 
29427ba1a0bfSKris Buschelman       /* sort sparserow */
29439566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi,sparserow));
29447ba1a0bfSKris Buschelman 
29457ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29467ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
29477ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
29489566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
29497ba1a0bfSKris Buschelman       }
29507ba1a0bfSKris Buschelman 
29517ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29529566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi));
295326fbe8dcSKarl Rupp 
29547ba1a0bfSKris Buschelman       current_space->array           += cnzi;
29557ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
29567ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29577ba1a0bfSKris Buschelman 
295826fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
295926fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
296026fbe8dcSKarl Rupp 
29617ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29627ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29637ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
29647ba1a0bfSKris Buschelman     }
29657ba1a0bfSKris Buschelman   }
29667ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29677ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29687ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn]+1,&cj));
29709566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
29719566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow));
29727ba1a0bfSKris Buschelman 
29737ba1a0bfSKris Buschelman   /* Allocate space for ca */
29749566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn]+1,&ca));
29757ba1a0bfSKris Buschelman 
29767ba1a0bfSKris Buschelman   /* put together the new matrix */
29779566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C));
29789566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C,pp->dof));
29797ba1a0bfSKris Buschelman 
29807ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29817ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29824222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
2983e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2984e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29857ba1a0bfSKris Buschelman   c->nonew   = 0;
298626fbe8dcSKarl Rupp 
29874222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29884222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29897ba1a0bfSKris Buschelman 
29907ba1a0bfSKris Buschelman   /* Clean up. */
29919566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
29927ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29937ba1a0bfSKris Buschelman }
29947ba1a0bfSKris Buschelman 
29957ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
29967ba1a0bfSKris Buschelman {
29977ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29987ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
29997ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
30007ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30017ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30027ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3003a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3004d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3005d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3006d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3007a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3008d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30097ba1a0bfSKris Buschelman 
30107ba1a0bfSKris Buschelman   PetscFunctionBegin;
30117ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30129566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense));
30137ba1a0bfSKris Buschelman 
30147ba1a0bfSKris Buschelman   /* Clear old values in C */
30159566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca,ci[cm]));
30167ba1a0bfSKris Buschelman 
30177ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30187ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30197ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30207ba1a0bfSKris Buschelman     apnzj = 0;
30217ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30227ba1a0bfSKris Buschelman       /* Get offset within block of P */
30237ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30247ba1a0bfSKris Buschelman       /* Get block row of P */
30257ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30267ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30277ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30287ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30297ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30307ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30317ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30327ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30337ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30347ba1a0bfSKris Buschelman         }
30357ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30367ba1a0bfSKris Buschelman       }
30379566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0*pnzj));
30387ba1a0bfSKris Buschelman       aa++;
30397ba1a0bfSKris Buschelman     }
30407ba1a0bfSKris Buschelman 
30417ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30427ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30439566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj,apj));
30447ba1a0bfSKris Buschelman 
30457ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30467ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
30477ba1a0bfSKris Buschelman     pshift  = i%ppdof;
30487ba1a0bfSKris Buschelman     poffset = pi[prow];
30497ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
30507ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30517ba1a0bfSKris Buschelman     pJ = pj+poffset;
30527ba1a0bfSKris Buschelman     pA = pa+poffset;
30537ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
30547ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
30557ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30567ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30577ba1a0bfSKris Buschelman       pJ++;
30587ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30597ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
306026fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
30617ba1a0bfSKris Buschelman       }
30629566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0*apnzj));
30637ba1a0bfSKris Buschelman       pA++;
30647ba1a0bfSKris Buschelman     }
30657ba1a0bfSKris Buschelman 
30667ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30677ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
30687ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30697ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30707ba1a0bfSKris Buschelman     }
30717ba1a0bfSKris Buschelman   }
30727ba1a0bfSKris Buschelman 
30737ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30749566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
30759566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
30769566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa,apj,apjdense));
307795ddefa2SHong Zhang   PetscFunctionReturn(0);
307895ddefa2SHong Zhang }
30797ba1a0bfSKris Buschelman 
30804222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
30812121bac1SHong Zhang {
30824222ddf1SHong Zhang   Mat_Product         *product = C->product;
30834222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
30842121bac1SHong Zhang 
30852121bac1SHong Zhang   PetscFunctionBegin;
30869566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C));
30872121bac1SHong Zhang   PetscFunctionReturn(0);
30882121bac1SHong Zhang }
30892121bac1SHong Zhang 
3090f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
309195ddefa2SHong Zhang {
309295ddefa2SHong Zhang   PetscFunctionBegin;
30933e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
309495ddefa2SHong Zhang }
309595ddefa2SHong Zhang 
3096f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
309795ddefa2SHong Zhang {
309895ddefa2SHong Zhang   PetscFunctionBegin;
30993e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31007ba1a0bfSKris Buschelman }
31015c65b9ecSFande Kong 
3102bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3103bc8e477aSFande Kong 
3104bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3105bc8e477aSFande Kong {
3106bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3107bc8e477aSFande Kong 
3108bc8e477aSFande Kong   PetscFunctionBegin;
310934bcad68SFande Kong 
31109566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C));
3111bc8e477aSFande Kong   PetscFunctionReturn(0);
3112bc8e477aSFande Kong }
3113bc8e477aSFande Kong 
31144222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3115bc8e477aSFande Kong 
31164222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3117bc8e477aSFande Kong {
3118bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3119bc8e477aSFande Kong 
3120bc8e477aSFande Kong   PetscFunctionBegin;
31219566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C));
31224222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3123bc8e477aSFande Kong   PetscFunctionReturn(0);
3124bc8e477aSFande Kong }
3125bc8e477aSFande Kong 
3126bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3127bc8e477aSFande Kong 
3128bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3129bc8e477aSFande Kong {
3130bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3131bc8e477aSFande Kong 
3132bc8e477aSFande Kong   PetscFunctionBegin;
313334bcad68SFande Kong 
31349566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C));
3135bc8e477aSFande Kong   PetscFunctionReturn(0);
3136bc8e477aSFande Kong }
3137bc8e477aSFande Kong 
31384222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3139bc8e477aSFande Kong 
31404222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3141bc8e477aSFande Kong {
3142bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3143bc8e477aSFande Kong 
3144bc8e477aSFande Kong   PetscFunctionBegin;
314534bcad68SFande Kong 
31469566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C));
31474222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3148bc8e477aSFande Kong   PetscFunctionReturn(0);
3149bc8e477aSFande Kong }
3150bc8e477aSFande Kong 
31514222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3152bc8e477aSFande Kong {
31534222ddf1SHong Zhang   Mat_Product         *product = C->product;
31544222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
31554222ddf1SHong Zhang   PetscBool           flg;
3156bc8e477aSFande Kong 
3157bc8e477aSFande Kong   PetscFunctionBegin;
31589566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"allatonce",&flg));
31594222ddf1SHong Zhang   if (flg) {
31609566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C));
31614222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31624222ddf1SHong Zhang     PetscFunctionReturn(0);
3163bc8e477aSFande Kong   }
316434bcad68SFande Kong 
31659566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg));
31664222ddf1SHong Zhang   if (flg) {
31679566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C));
31684222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31694222ddf1SHong Zhang     PetscFunctionReturn(0);
31704222ddf1SHong Zhang   }
317134bcad68SFande Kong 
31724222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3173bc8e477aSFande Kong }
3174bc8e477aSFande Kong 
3175cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31769c4fc4c7SBarry Smith {
31779c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3178ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
31799c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
31800fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3181ba8c8a56SBarry Smith   PetscInt       *cols;
3182ba8c8a56SBarry Smith   PetscScalar    *vals;
31839c4fc4c7SBarry Smith 
31849c4fc4c7SBarry Smith   PetscFunctionBegin;
31859566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a,&m,&n));
31869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof*m,&ilen));
31879c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31889c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
318926fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
31909c4fc4c7SBarry Smith   }
31919566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
31929566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n));
31939566063dSJacob Faibussowitsch   PetscCall(MatSetType(B,newtype));
31949566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B,0,ilen));
31959566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax,&icols));
31979c4fc4c7SBarry Smith   ii   = 0;
31989c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31999566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals));
32000fd73130SBarry Smith     for (j=0; j<dof; j++) {
320126fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
32029566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
32039c4fc4c7SBarry Smith       ii++;
32049c4fc4c7SBarry Smith     }
32059566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals));
32069c4fc4c7SBarry Smith   }
32079566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
32089566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
32099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3210ceb03754SKris Buschelman 
3211511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32129566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A,&B));
3213c3d102feSKris Buschelman   } else {
3214ceb03754SKris Buschelman     *newmat = B;
3215c3d102feSKris Buschelman   }
32169c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32179c4fc4c7SBarry Smith }
32189c4fc4c7SBarry Smith 
3219c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3220be1d678aSKris Buschelman 
3221cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32220fd73130SBarry Smith {
32230fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3224ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32250fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32260fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
32270fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
32280fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
32290298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
32300298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
32310fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
32320fd73130SBarry Smith   PetscScalar    *vals,*ovals;
32330fd73130SBarry Smith 
32340fd73130SBarry Smith   PetscFunctionBegin;
32359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz));
3236d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32370fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32380fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32390fd73130SBarry Smith     for (j=0; j<dof; j++) {
32400fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32410fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32420fd73130SBarry Smith     }
32430fd73130SBarry Smith   }
32449566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
32459566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
32469566063dSJacob Faibussowitsch   PetscCall(MatSetType(B,newtype));
32479566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz));
32489566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B,dof));
32499566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz,onz));
32500fd73130SBarry Smith 
32519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols));
3252d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3253d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
32540fd73130SBarry Smith   garray = mpiaij->garray;
32550fd73130SBarry Smith 
32560fd73130SBarry Smith   ii = rstart;
3257d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32589566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
32599566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
32600fd73130SBarry Smith     for (j=0; j<dof; j++) {
32610fd73130SBarry Smith       for (k=0; k<ncols; k++) {
32620fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
32630fd73130SBarry Smith       }
32640fd73130SBarry Smith       for (k=0; k<oncols; k++) {
32650fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
32660fd73130SBarry Smith       }
32679566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
32689566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES));
32690fd73130SBarry Smith       ii++;
32700fd73130SBarry Smith     }
32719566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
32729566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
32730fd73130SBarry Smith   }
32749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols,oicols));
32750fd73130SBarry Smith 
32769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
32779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3278ceb03754SKris Buschelman 
3279511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32807adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32817adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
328226fbe8dcSKarl Rupp 
32839566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A,&B));
328426fbe8dcSKarl Rupp 
32857adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3286c3d102feSKris Buschelman   } else {
3287ceb03754SKris Buschelman     *newmat = B;
3288c3d102feSKris Buschelman   }
32890fd73130SBarry Smith   PetscFunctionReturn(0);
32900fd73130SBarry Smith }
32910fd73130SBarry Smith 
32927dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
32939e516c8fSBarry Smith {
32949e516c8fSBarry Smith   Mat            A;
32959e516c8fSBarry Smith 
32969e516c8fSBarry Smith   PetscFunctionBegin;
32979566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
32989566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
32999566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
33009e516c8fSBarry Smith   PetscFunctionReturn(0);
33019e516c8fSBarry Smith }
33020fd73130SBarry Smith 
3303ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3304ec626240SStefano Zampini {
3305ec626240SStefano Zampini   Mat            A;
3306ec626240SStefano Zampini 
3307ec626240SStefano Zampini   PetscFunctionBegin;
33089566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
33099566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat));
33109566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3311ec626240SStefano Zampini   PetscFunctionReturn(0);
3312ec626240SStefano Zampini }
3313ec626240SStefano Zampini 
3314bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3315480dffcfSJed Brown /*@
33160bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33170bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33180bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33190bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33200bad9183SKris Buschelman 
3321ff585edeSJed Brown   Collective
3322ff585edeSJed Brown 
3323ff585edeSJed Brown   Input Parameters:
3324ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3325ff585edeSJed Brown - dof - the block size (number of components per node)
3326ff585edeSJed Brown 
3327ff585edeSJed Brown   Output Parameter:
3328ff585edeSJed Brown . maij - the new MAIJ matrix
3329ff585edeSJed Brown 
33300bad9183SKris Buschelman   Operations provided:
333167b8a455SSatish Balay .vb
333267b8a455SSatish Balay     MatMult
333367b8a455SSatish Balay     MatMultTranspose
333467b8a455SSatish Balay     MatMultAdd
333567b8a455SSatish Balay     MatMultTransposeAdd
333667b8a455SSatish Balay     MatView
333767b8a455SSatish Balay .ve
33380bad9183SKris Buschelman 
33390bad9183SKris Buschelman   Level: advanced
33400bad9183SKris Buschelman 
3341b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3342ff585edeSJed Brown @*/
33437087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
334482b1193eSBarry Smith {
3345b24ad042SBarry Smith   PetscMPIInt    size;
3346b24ad042SBarry Smith   PetscInt       n;
334782b1193eSBarry Smith   Mat            B;
3348fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3349fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3350fdc842d1SBarry Smith   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3351fdc842d1SBarry Smith #endif
335282b1193eSBarry Smith 
335382b1193eSBarry Smith   PetscFunctionBegin;
3354fdc842d1SBarry Smith   dof  = PetscAbs(dof);
33559566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3356d72c5c08SBarry Smith 
335726fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
335826fbe8dcSKarl Rupp   else {
33599566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3360c248e2fdSStefano Zampini     /* propagate vec type */
33619566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B,A->defaultvectype));
33629566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N));
33639566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap,dof));
33649566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap,dof));
33659566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
33669566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
336726fbe8dcSKarl Rupp 
3368cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3369d72c5c08SBarry Smith 
33709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3371f4a53059SBarry Smith     if (size == 1) {
3372feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3373feb9fe23SJed Brown 
33749566063dSJacob Faibussowitsch       PetscCall(MatSetType(B,MATSEQMAIJ));
337526fbe8dcSKarl Rupp 
33760298fd71SBarry Smith       B->ops->setup   = NULL;
33774cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33780fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
33794222ddf1SHong Zhang 
3380feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3381b9b97703SBarry Smith       b->dof          = dof;
33824cb9d9b8SBarry Smith       b->AIJ          = A;
338326fbe8dcSKarl Rupp 
3384d72c5c08SBarry Smith       if (dof == 2) {
3385cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3386cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3387cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3388cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3389bcc973b7SBarry Smith       } else if (dof == 3) {
3390bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3391bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3392bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3393bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3394bcc973b7SBarry Smith       } else if (dof == 4) {
3395bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3396bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3397bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3398bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3399f9fae5adSBarry Smith       } else if (dof == 5) {
3400f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3401f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3402f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3403f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34046cd98798SBarry Smith       } else if (dof == 6) {
34056cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34066cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34076cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34086cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3409ed8eea03SBarry Smith       } else if (dof == 7) {
3410ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3411ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3412ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3413ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
341466ed3db0SBarry Smith       } else if (dof == 8) {
341566ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
341666ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
341766ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
341866ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34192b692628SMatthew Knepley       } else if (dof == 9) {
34202b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34212b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34222b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34232b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3424b9cda22cSBarry Smith       } else if (dof == 10) {
3425b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3426b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3427b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3428b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3429dbdd7285SBarry Smith       } else if (dof == 11) {
3430dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3431dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3432dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3433dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34342f7816d4SBarry Smith       } else if (dof == 16) {
34352f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34362f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34372f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34382f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3439ed1c418cSBarry Smith       } else if (dof == 18) {
3440ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3441ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3442ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3443ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
344482b1193eSBarry Smith       } else {
34456861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34466861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34476861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34486861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
344982b1193eSBarry Smith       }
3450fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34519566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ));
3452fdc842d1SBarry Smith #endif
34539566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ));
34549566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3455f4a53059SBarry Smith     } else {
3456f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3457feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3458f4a53059SBarry Smith       IS          from,to;
3459f4a53059SBarry Smith       Vec         gvec;
3460f4a53059SBarry Smith 
34619566063dSJacob Faibussowitsch       PetscCall(MatSetType(B,MATMPIMAIJ));
346226fbe8dcSKarl Rupp 
34630298fd71SBarry Smith       B->ops->setup            = NULL;
3464d72c5c08SBarry Smith       B->ops->destroy          = MatDestroy_MPIMAIJ;
34650fd73130SBarry Smith       B->ops->view             = MatView_MPIMAIJ;
346626fbe8dcSKarl Rupp 
3467b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3468b9b97703SBarry Smith       b->dof = dof;
3469b9b97703SBarry Smith       b->A   = A;
347026fbe8dcSKarl Rupp 
34719566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ));
34729566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ));
34734cb9d9b8SBarry Smith 
34749566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec,&n));
34759566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF,&b->w));
34769566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w,n*dof,n*dof));
34779566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w,dof));
34789566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w,VECSEQ));
3479f4a53059SBarry Smith 
3480f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
34819566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
34829566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to));
3483f4a53059SBarry Smith 
3484f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
34859566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec));
3486f4a53059SBarry Smith 
3487f4a53059SBarry Smith       /* generate the scatter context */
34889566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx));
3489f4a53059SBarry Smith 
34909566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34919566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34929566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3493f4a53059SBarry Smith 
3494f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34954cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34964cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34974cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
349826fbe8dcSKarl Rupp 
3499fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
35009566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ));
3501fdc842d1SBarry Smith #endif
35029566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ));
35039566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3504f4a53059SBarry Smith     }
35057dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3506ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
35079566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3508fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3509fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3510fdc842d1SBarry Smith     {
3511fdc842d1SBarry Smith       PetscBool flg;
3512fdc842d1SBarry Smith       if (convert) {
35139566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,""));
3514fdc842d1SBarry Smith         if (flg) {
35159566063dSJacob Faibussowitsch           PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B));
3516fdc842d1SBarry Smith         }
3517fdc842d1SBarry Smith       }
3518fdc842d1SBarry Smith     }
3519fdc842d1SBarry Smith #endif
3520cd3bbe55SBarry Smith     *maij = B;
3521d72c5c08SBarry Smith   }
352282b1193eSBarry Smith   PetscFunctionReturn(0);
352382b1193eSBarry Smith }
3524