xref: /petsc/src/mat/impls/maij/maij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1be1d678aSKris Buschelman 
2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
482b1193eSBarry Smith 
5b350b9acSSatish Balay /*@
611a5261eSBarry Smith    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7ff585edeSJed Brown 
811a5261eSBarry Smith    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9ff585edeSJed Brown 
10ff585edeSJed Brown    Input Parameter:
1111a5261eSBarry Smith .  A - the `MATMAIJ` matrix
12ff585edeSJed Brown 
13ff585edeSJed Brown    Output Parameter:
1411a5261eSBarry Smith .  B - the `MATAIJ` matrix
15ff585edeSJed Brown 
16ff585edeSJed Brown    Level: advanced
17ff585edeSJed Brown 
1811a5261eSBarry Smith    Note:
1911a5261eSBarry Smith     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20ff585edeSJed Brown 
2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22ff585edeSJed Brown @*/
239371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) {
24ace3abfcSBarry Smith   PetscBool ismpimaij, isseqmaij;
25b9b97703SBarry Smith 
26b9b97703SBarry Smith   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
289566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29b9b97703SBarry Smith   if (ismpimaij) {
30b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31b9b97703SBarry Smith 
32b9b97703SBarry Smith     *B = b->A;
33b9b97703SBarry Smith   } else if (isseqmaij) {
34b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35b9b97703SBarry Smith 
36b9b97703SBarry Smith     *B = b->AIJ;
37b9b97703SBarry Smith   } else {
38b9b97703SBarry Smith     *B = A;
39b9b97703SBarry Smith   }
40b9b97703SBarry Smith   PetscFunctionReturn(0);
41b9b97703SBarry Smith }
42b9b97703SBarry Smith 
43480dffcfSJed Brown /*@
4411a5261eSBarry Smith    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45ff585edeSJed Brown 
463f9fe445SBarry Smith    Logically Collective
47ff585edeSJed Brown 
48d8d19677SJose E. Roman    Input Parameters:
4911a5261eSBarry Smith +  A - the `MATMAIJ` matrix
50ff585edeSJed Brown -  dof - the block size for the new matrix
51ff585edeSJed Brown 
52ff585edeSJed Brown    Output Parameter:
5311a5261eSBarry Smith .  B - the new `MATMAIJ` matrix
54ff585edeSJed Brown 
55ff585edeSJed Brown    Level: advanced
56ff585edeSJed Brown 
5711a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()`
58ff585edeSJed Brown @*/
599371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) {
600298fd71SBarry Smith   Mat Aij = NULL;
61b9b97703SBarry Smith 
62b9b97703SBarry Smith   PetscFunctionBegin;
63c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A, dof, 2);
649566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A, &Aij));
659566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij, dof, B));
66b9b97703SBarry Smith   PetscFunctionReturn(0);
67b9b97703SBarry Smith }
68b9b97703SBarry Smith 
699371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) {
704cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
7182b1193eSBarry Smith 
7282b1193eSBarry Smith   PetscFunctionBegin;
739566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
749566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
784cb9d9b8SBarry Smith   PetscFunctionReturn(0);
794cb9d9b8SBarry Smith }
804cb9d9b8SBarry Smith 
819371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) {
82356c569eSBarry Smith   PetscFunctionBegin;
83ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
84356c569eSBarry Smith }
85356c569eSBarry Smith 
869371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) {
870fd73130SBarry Smith   Mat B;
880fd73130SBarry Smith 
890fd73130SBarry Smith   PetscFunctionBegin;
909566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
919566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
929566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
930fd73130SBarry Smith   PetscFunctionReturn(0);
940fd73130SBarry Smith }
950fd73130SBarry Smith 
969371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) {
970fd73130SBarry Smith   Mat B;
980fd73130SBarry Smith 
990fd73130SBarry Smith   PetscFunctionBegin;
1009566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1019566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1030fd73130SBarry Smith   PetscFunctionReturn(0);
1040fd73130SBarry Smith }
1050fd73130SBarry Smith 
1069371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) {
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1084cb9d9b8SBarry Smith 
1094cb9d9b8SBarry Smith   PetscFunctionBegin;
1109566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1119566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1129566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1139566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1149566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1159566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1162e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1199566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
120b9b97703SBarry Smith   PetscFunctionReturn(0);
121b9b97703SBarry Smith }
122b9b97703SBarry Smith 
1230bad9183SKris Buschelman /*MC
124fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1250bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
12611a5261eSBarry Smith   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
1270bad9183SKris Buschelman 
1280bad9183SKris Buschelman   Operations provided:
12967b8a455SSatish Balay .vb
13011a5261eSBarry Smith     MatMult()
13111a5261eSBarry Smith     MatMultTranspose()
13211a5261eSBarry Smith     MatMultAdd()
13311a5261eSBarry Smith     MatMultTransposeAdd()
13467b8a455SSatish Balay .ve
1350bad9183SKris Buschelman 
1360bad9183SKris Buschelman   Level: advanced
1370bad9183SKris Buschelman 
13811a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1390bad9183SKris Buschelman M*/
1400bad9183SKris Buschelman 
1419371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) {
1424cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
14364b52464SHong Zhang   PetscMPIInt  size;
14482b1193eSBarry Smith 
14582b1193eSBarry Smith   PetscFunctionBegin;
146*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
147b0a32e0cSBarry Smith   A->data = (void *)b;
14826fbe8dcSKarl Rupp 
1499566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
15026fbe8dcSKarl Rupp 
151356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
152f4a53059SBarry Smith 
153f4259b30SLisandro Dalcin   b->AIJ  = NULL;
154cd3bbe55SBarry Smith   b->dof  = 0;
155f4259b30SLisandro Dalcin   b->OAIJ = NULL;
156f4259b30SLisandro Dalcin   b->ctx  = NULL;
157f4259b30SLisandro Dalcin   b->w    = NULL;
1589566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
15964b52464SHong Zhang   if (size == 1) {
1609566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
16164b52464SHong Zhang   } else {
1629566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
16364b52464SHong Zhang   }
16432e7c8b0SBarry Smith   A->preallocated = PETSC_TRUE;
16532e7c8b0SBarry Smith   A->assembled    = PETSC_TRUE;
16682b1193eSBarry Smith   PetscFunctionReturn(0);
16782b1193eSBarry Smith }
16882b1193eSBarry Smith 
169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
1714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
172bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
173d2840e09SBarry Smith   const PetscScalar *x, *v;
174d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
175d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
176d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
17782b1193eSBarry Smith 
178bcc973b7SBarry Smith   PetscFunctionBegin;
1799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
181bcc973b7SBarry Smith   idx = a->j;
182bcc973b7SBarry Smith   v   = a->a;
183bcc973b7SBarry Smith   ii  = a->i;
184bcc973b7SBarry Smith 
185bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
186bcc973b7SBarry Smith     jrow = ii[i];
187bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
188bcc973b7SBarry Smith     sum1 = 0.0;
189bcc973b7SBarry Smith     sum2 = 0.0;
19026fbe8dcSKarl Rupp 
191b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
192bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
193bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
194bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
195bcc973b7SBarry Smith       jrow++;
196bcc973b7SBarry Smith     }
197bcc973b7SBarry Smith     y[2 * i]     = sum1;
198bcc973b7SBarry Smith     y[2 * i + 1] = sum2;
199bcc973b7SBarry Smith   }
200bcc973b7SBarry Smith 
2019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
2029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
20482b1193eSBarry Smith   PetscFunctionReturn(0);
20582b1193eSBarry Smith }
206bcc973b7SBarry Smith 
2079371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
2084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
209bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
210d2840e09SBarry Smith   const PetscScalar *x, *v;
211d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
212d2840e09SBarry Smith   PetscInt           n, i;
213d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
21482b1193eSBarry Smith 
215bcc973b7SBarry Smith   PetscFunctionBegin;
2169566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
2179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
219bfec09a0SHong Zhang 
220bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
221bfec09a0SHong Zhang     idx    = a->j + a->i[i];
222bfec09a0SHong Zhang     v      = a->a + a->i[i];
223bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
224bcc973b7SBarry Smith     alpha1 = x[2 * i];
225bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
22626fbe8dcSKarl Rupp     while (n-- > 0) {
22726fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
22826fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2299371c9d4SSatish Balay       idx++;
2309371c9d4SSatish Balay       v++;
23126fbe8dcSKarl Rupp     }
232bcc973b7SBarry Smith   }
2339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
23682b1193eSBarry Smith   PetscFunctionReturn(0);
23782b1193eSBarry Smith }
238bcc973b7SBarry Smith 
2399371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2404cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
241bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
242d2840e09SBarry Smith   const PetscScalar *x, *v;
243d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
244b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
245d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
24682b1193eSBarry Smith 
247bcc973b7SBarry Smith   PetscFunctionBegin;
2489566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
251bcc973b7SBarry Smith   idx = a->j;
252bcc973b7SBarry Smith   v   = a->a;
253bcc973b7SBarry Smith   ii  = a->i;
254bcc973b7SBarry Smith 
255bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
256bcc973b7SBarry Smith     jrow = ii[i];
257bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
258bcc973b7SBarry Smith     sum1 = 0.0;
259bcc973b7SBarry Smith     sum2 = 0.0;
260bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
261bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
262bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
263bcc973b7SBarry Smith       jrow++;
264bcc973b7SBarry Smith     }
265bcc973b7SBarry Smith     y[2 * i] += sum1;
266bcc973b7SBarry Smith     y[2 * i + 1] += sum2;
267bcc973b7SBarry Smith   }
268bcc973b7SBarry Smith 
2699566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
272bcc973b7SBarry Smith   PetscFunctionReturn(0);
27382b1193eSBarry Smith }
2749371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2754cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
276bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
277d2840e09SBarry Smith   const PetscScalar *x, *v;
278d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
279d2840e09SBarry Smith   PetscInt           n, i;
280d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
28182b1193eSBarry Smith 
282bcc973b7SBarry Smith   PetscFunctionBegin;
2839566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
286bfec09a0SHong Zhang 
287bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
288bfec09a0SHong Zhang     idx    = a->j + a->i[i];
289bfec09a0SHong Zhang     v      = a->a + a->i[i];
290bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
291bcc973b7SBarry Smith     alpha1 = x[2 * i];
292bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
29326fbe8dcSKarl Rupp     while (n-- > 0) {
29426fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
29526fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2969371c9d4SSatish Balay       idx++;
2979371c9d4SSatish Balay       v++;
29826fbe8dcSKarl Rupp     }
299bcc973b7SBarry Smith   }
3009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
3019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
303bcc973b7SBarry Smith   PetscFunctionReturn(0);
30482b1193eSBarry Smith }
305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3069371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3074cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
308bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
309d2840e09SBarry Smith   const PetscScalar *x, *v;
310d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
311d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
312d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
31382b1193eSBarry Smith 
314bcc973b7SBarry Smith   PetscFunctionBegin;
3159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
317bcc973b7SBarry Smith   idx = a->j;
318bcc973b7SBarry Smith   v   = a->a;
319bcc973b7SBarry Smith   ii  = a->i;
320bcc973b7SBarry Smith 
321bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
322bcc973b7SBarry Smith     jrow = ii[i];
323bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
324bcc973b7SBarry Smith     sum1 = 0.0;
325bcc973b7SBarry Smith     sum2 = 0.0;
326bcc973b7SBarry Smith     sum3 = 0.0;
32726fbe8dcSKarl Rupp 
328b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
329bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
330bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
331bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
332bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
333bcc973b7SBarry Smith       jrow++;
334bcc973b7SBarry Smith     }
335bcc973b7SBarry Smith     y[3 * i]     = sum1;
336bcc973b7SBarry Smith     y[3 * i + 1] = sum2;
337bcc973b7SBarry Smith     y[3 * i + 2] = sum3;
338bcc973b7SBarry Smith   }
339bcc973b7SBarry Smith 
3409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
3419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
343bcc973b7SBarry Smith   PetscFunctionReturn(0);
344bcc973b7SBarry Smith }
345bcc973b7SBarry Smith 
3469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
348bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
349d2840e09SBarry Smith   const PetscScalar *x, *v;
350d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
351d2840e09SBarry Smith   PetscInt           n, i;
352d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
353bcc973b7SBarry Smith 
354bcc973b7SBarry Smith   PetscFunctionBegin;
3559566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
3569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
358bfec09a0SHong Zhang 
359bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
360bfec09a0SHong Zhang     idx    = a->j + a->i[i];
361bfec09a0SHong Zhang     v      = a->a + a->i[i];
362bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
363bcc973b7SBarry Smith     alpha1 = x[3 * i];
364bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
365bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
366bcc973b7SBarry Smith     while (n-- > 0) {
367bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
368bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
369bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
3709371c9d4SSatish Balay       idx++;
3719371c9d4SSatish Balay       v++;
372bcc973b7SBarry Smith     }
373bcc973b7SBarry Smith   }
3749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
3759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
377bcc973b7SBarry Smith   PetscFunctionReturn(0);
378bcc973b7SBarry Smith }
379bcc973b7SBarry Smith 
3809371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
3814cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
382bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
383d2840e09SBarry Smith   const PetscScalar *x, *v;
384d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
385b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
386d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
387bcc973b7SBarry Smith 
388bcc973b7SBarry Smith   PetscFunctionBegin;
3899566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
3909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
392bcc973b7SBarry Smith   idx = a->j;
393bcc973b7SBarry Smith   v   = a->a;
394bcc973b7SBarry Smith   ii  = a->i;
395bcc973b7SBarry Smith 
396bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
397bcc973b7SBarry Smith     jrow = ii[i];
398bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
399bcc973b7SBarry Smith     sum1 = 0.0;
400bcc973b7SBarry Smith     sum2 = 0.0;
401bcc973b7SBarry Smith     sum3 = 0.0;
402bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
403bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
404bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
405bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
406bcc973b7SBarry Smith       jrow++;
407bcc973b7SBarry Smith     }
408bcc973b7SBarry Smith     y[3 * i] += sum1;
409bcc973b7SBarry Smith     y[3 * i + 1] += sum2;
410bcc973b7SBarry Smith     y[3 * i + 2] += sum3;
411bcc973b7SBarry Smith   }
412bcc973b7SBarry Smith 
4139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
416bcc973b7SBarry Smith   PetscFunctionReturn(0);
417bcc973b7SBarry Smith }
4189371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
4194cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
420bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
421d2840e09SBarry Smith   const PetscScalar *x, *v;
422d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
423d2840e09SBarry Smith   PetscInt           n, i;
424d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
425bcc973b7SBarry Smith 
426bcc973b7SBarry Smith   PetscFunctionBegin;
4279566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
430bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
431bfec09a0SHong Zhang     idx    = a->j + a->i[i];
432bfec09a0SHong Zhang     v      = a->a + a->i[i];
433bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
434bcc973b7SBarry Smith     alpha1 = x[3 * i];
435bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
436bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
437bcc973b7SBarry Smith     while (n-- > 0) {
438bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
439bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
440bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
4419371c9d4SSatish Balay       idx++;
4429371c9d4SSatish Balay       v++;
443bcc973b7SBarry Smith     }
444bcc973b7SBarry Smith   }
4459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
448bcc973b7SBarry Smith   PetscFunctionReturn(0);
449bcc973b7SBarry Smith }
450bcc973b7SBarry Smith 
451bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4529371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
4534cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
454bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
455d2840e09SBarry Smith   const PetscScalar *x, *v;
456d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
457d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
458d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
459bcc973b7SBarry Smith 
460bcc973b7SBarry Smith   PetscFunctionBegin;
4619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4629566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
463bcc973b7SBarry Smith   idx = a->j;
464bcc973b7SBarry Smith   v   = a->a;
465bcc973b7SBarry Smith   ii  = a->i;
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
468bcc973b7SBarry Smith     jrow = ii[i];
469bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
470bcc973b7SBarry Smith     sum1 = 0.0;
471bcc973b7SBarry Smith     sum2 = 0.0;
472bcc973b7SBarry Smith     sum3 = 0.0;
473bcc973b7SBarry Smith     sum4 = 0.0;
474b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
475bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
476bcc973b7SBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
477bcc973b7SBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
478bcc973b7SBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
479bcc973b7SBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
480bcc973b7SBarry Smith       jrow++;
481bcc973b7SBarry Smith     }
482bcc973b7SBarry Smith     y[4 * i]     = sum1;
483bcc973b7SBarry Smith     y[4 * i + 1] = sum2;
484bcc973b7SBarry Smith     y[4 * i + 2] = sum3;
485bcc973b7SBarry Smith     y[4 * i + 3] = sum4;
486bcc973b7SBarry Smith   }
487bcc973b7SBarry Smith 
4889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
4899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
491bcc973b7SBarry Smith   PetscFunctionReturn(0);
492bcc973b7SBarry Smith }
493bcc973b7SBarry Smith 
4949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
4954cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
496bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
497d2840e09SBarry Smith   const PetscScalar *x, *v;
498d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
499d2840e09SBarry Smith   PetscInt           n, i;
500d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
501bcc973b7SBarry Smith 
502bcc973b7SBarry Smith   PetscFunctionBegin;
5039566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
5049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5059566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
506bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
507bfec09a0SHong Zhang     idx    = a->j + a->i[i];
508bfec09a0SHong Zhang     v      = a->a + a->i[i];
509bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
510bcc973b7SBarry Smith     alpha1 = x[4 * i];
511bcc973b7SBarry Smith     alpha2 = x[4 * i + 1];
512bcc973b7SBarry Smith     alpha3 = x[4 * i + 2];
513bcc973b7SBarry Smith     alpha4 = x[4 * i + 3];
514bcc973b7SBarry Smith     while (n-- > 0) {
515bcc973b7SBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
516bcc973b7SBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
517bcc973b7SBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
518bcc973b7SBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5199371c9d4SSatish Balay       idx++;
5209371c9d4SSatish Balay       v++;
521bcc973b7SBarry Smith     }
522bcc973b7SBarry Smith   }
5239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
526bcc973b7SBarry Smith   PetscFunctionReturn(0);
527bcc973b7SBarry Smith }
528bcc973b7SBarry Smith 
5299371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5304cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
531f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
532d2840e09SBarry Smith   const PetscScalar *x, *v;
533d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
534b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
535d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
536f9fae5adSBarry Smith 
537f9fae5adSBarry Smith   PetscFunctionBegin;
5389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
541f9fae5adSBarry Smith   idx = a->j;
542f9fae5adSBarry Smith   v   = a->a;
543f9fae5adSBarry Smith   ii  = a->i;
544f9fae5adSBarry Smith 
545f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
546f9fae5adSBarry Smith     jrow = ii[i];
547f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
548f9fae5adSBarry Smith     sum1 = 0.0;
549f9fae5adSBarry Smith     sum2 = 0.0;
550f9fae5adSBarry Smith     sum3 = 0.0;
551f9fae5adSBarry Smith     sum4 = 0.0;
552f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
553f9fae5adSBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
554f9fae5adSBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
555f9fae5adSBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
556f9fae5adSBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
557f9fae5adSBarry Smith       jrow++;
558f9fae5adSBarry Smith     }
559f9fae5adSBarry Smith     y[4 * i] += sum1;
560f9fae5adSBarry Smith     y[4 * i + 1] += sum2;
561f9fae5adSBarry Smith     y[4 * i + 2] += sum3;
562f9fae5adSBarry Smith     y[4 * i + 3] += sum4;
563f9fae5adSBarry Smith   }
564f9fae5adSBarry Smith 
5659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
568f9fae5adSBarry Smith   PetscFunctionReturn(0);
569bcc973b7SBarry Smith }
5709371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
572f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
573d2840e09SBarry Smith   const PetscScalar *x, *v;
574d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
575d2840e09SBarry Smith   PetscInt           n, i;
576d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
577f9fae5adSBarry Smith 
578f9fae5adSBarry Smith   PetscFunctionBegin;
5799566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
582bfec09a0SHong Zhang 
583f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
584bfec09a0SHong Zhang     idx    = a->j + a->i[i];
585bfec09a0SHong Zhang     v      = a->a + a->i[i];
586f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
587f9fae5adSBarry Smith     alpha1 = x[4 * i];
588f9fae5adSBarry Smith     alpha2 = x[4 * i + 1];
589f9fae5adSBarry Smith     alpha3 = x[4 * i + 2];
590f9fae5adSBarry Smith     alpha4 = x[4 * i + 3];
591f9fae5adSBarry Smith     while (n-- > 0) {
592f9fae5adSBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
593f9fae5adSBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
594f9fae5adSBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
595f9fae5adSBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5969371c9d4SSatish Balay       idx++;
5979371c9d4SSatish Balay       v++;
598f9fae5adSBarry Smith     }
599f9fae5adSBarry Smith   }
6009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
6019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
603f9fae5adSBarry Smith   PetscFunctionReturn(0);
604f9fae5adSBarry Smith }
605f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6066cd98798SBarry Smith 
6079371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
609f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
610d2840e09SBarry Smith   const PetscScalar *x, *v;
611d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
612d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
613d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
614f9fae5adSBarry Smith 
615f9fae5adSBarry Smith   PetscFunctionBegin;
6169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
618f9fae5adSBarry Smith   idx = a->j;
619f9fae5adSBarry Smith   v   = a->a;
620f9fae5adSBarry Smith   ii  = a->i;
621f9fae5adSBarry Smith 
622f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
623f9fae5adSBarry Smith     jrow = ii[i];
624f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
625f9fae5adSBarry Smith     sum1 = 0.0;
626f9fae5adSBarry Smith     sum2 = 0.0;
627f9fae5adSBarry Smith     sum3 = 0.0;
628f9fae5adSBarry Smith     sum4 = 0.0;
629f9fae5adSBarry Smith     sum5 = 0.0;
63026fbe8dcSKarl Rupp 
631b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
632f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
633f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
634f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
635f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
636f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
637f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
638f9fae5adSBarry Smith       jrow++;
639f9fae5adSBarry Smith     }
640f9fae5adSBarry Smith     y[5 * i]     = sum1;
641f9fae5adSBarry Smith     y[5 * i + 1] = sum2;
642f9fae5adSBarry Smith     y[5 * i + 2] = sum3;
643f9fae5adSBarry Smith     y[5 * i + 3] = sum4;
644f9fae5adSBarry Smith     y[5 * i + 4] = sum5;
645f9fae5adSBarry Smith   }
646f9fae5adSBarry Smith 
6479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
6489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
650f9fae5adSBarry Smith   PetscFunctionReturn(0);
651f9fae5adSBarry Smith }
652f9fae5adSBarry Smith 
6539371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6544cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
655f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656d2840e09SBarry Smith   const PetscScalar *x, *v;
657d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
658d2840e09SBarry Smith   PetscInt           n, i;
659d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
660f9fae5adSBarry Smith 
661f9fae5adSBarry Smith   PetscFunctionBegin;
6629566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
6639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
665bfec09a0SHong Zhang 
666f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
667bfec09a0SHong Zhang     idx    = a->j + a->i[i];
668bfec09a0SHong Zhang     v      = a->a + a->i[i];
669f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
670f9fae5adSBarry Smith     alpha1 = x[5 * i];
671f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
672f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
673f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
674f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
675f9fae5adSBarry Smith     while (n-- > 0) {
676f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
677f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
678f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
679f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
680f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
6819371c9d4SSatish Balay       idx++;
6829371c9d4SSatish Balay       v++;
683f9fae5adSBarry Smith     }
684f9fae5adSBarry Smith   }
6859566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
6869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
688f9fae5adSBarry Smith   PetscFunctionReturn(0);
689f9fae5adSBarry Smith }
690f9fae5adSBarry Smith 
6919371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
6924cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
693f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
694d2840e09SBarry Smith   const PetscScalar *x, *v;
695d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
696b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
697d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
698f9fae5adSBarry Smith 
699f9fae5adSBarry Smith   PetscFunctionBegin;
7009566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
703f9fae5adSBarry Smith   idx = a->j;
704f9fae5adSBarry Smith   v   = a->a;
705f9fae5adSBarry Smith   ii  = a->i;
706f9fae5adSBarry Smith 
707f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
708f9fae5adSBarry Smith     jrow = ii[i];
709f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
710f9fae5adSBarry Smith     sum1 = 0.0;
711f9fae5adSBarry Smith     sum2 = 0.0;
712f9fae5adSBarry Smith     sum3 = 0.0;
713f9fae5adSBarry Smith     sum4 = 0.0;
714f9fae5adSBarry Smith     sum5 = 0.0;
715f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
716f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
717f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
718f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
719f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
720f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
721f9fae5adSBarry Smith       jrow++;
722f9fae5adSBarry Smith     }
723f9fae5adSBarry Smith     y[5 * i] += sum1;
724f9fae5adSBarry Smith     y[5 * i + 1] += sum2;
725f9fae5adSBarry Smith     y[5 * i + 2] += sum3;
726f9fae5adSBarry Smith     y[5 * i + 3] += sum4;
727f9fae5adSBarry Smith     y[5 * i + 4] += sum5;
728f9fae5adSBarry Smith   }
729f9fae5adSBarry Smith 
7309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
733f9fae5adSBarry Smith   PetscFunctionReturn(0);
734f9fae5adSBarry Smith }
735f9fae5adSBarry Smith 
7369371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
7374cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
738f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
739d2840e09SBarry Smith   const PetscScalar *x, *v;
740d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
741d2840e09SBarry Smith   PetscInt           n, i;
742d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
743f9fae5adSBarry Smith 
744f9fae5adSBarry Smith   PetscFunctionBegin;
7459566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
748bfec09a0SHong Zhang 
749f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
750bfec09a0SHong Zhang     idx    = a->j + a->i[i];
751bfec09a0SHong Zhang     v      = a->a + a->i[i];
752f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
753f9fae5adSBarry Smith     alpha1 = x[5 * i];
754f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
755f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
756f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
757f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
758f9fae5adSBarry Smith     while (n-- > 0) {
759f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
760f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
761f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
762f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
763f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
7649371c9d4SSatish Balay       idx++;
7659371c9d4SSatish Balay       v++;
766f9fae5adSBarry Smith     }
767f9fae5adSBarry Smith   }
7689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
771f9fae5adSBarry Smith   PetscFunctionReturn(0);
772bcc973b7SBarry Smith }
773bcc973b7SBarry Smith 
7746cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7759371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
7766cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
7776cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
778d2840e09SBarry Smith   const PetscScalar *x, *v;
779d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
780d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
781d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
7826cd98798SBarry Smith 
7836cd98798SBarry Smith   PetscFunctionBegin;
7849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
7866cd98798SBarry Smith   idx = a->j;
7876cd98798SBarry Smith   v   = a->a;
7886cd98798SBarry Smith   ii  = a->i;
7896cd98798SBarry Smith 
7906cd98798SBarry Smith   for (i = 0; i < m; i++) {
7916cd98798SBarry Smith     jrow = ii[i];
7926cd98798SBarry Smith     n    = ii[i + 1] - jrow;
7936cd98798SBarry Smith     sum1 = 0.0;
7946cd98798SBarry Smith     sum2 = 0.0;
7956cd98798SBarry Smith     sum3 = 0.0;
7966cd98798SBarry Smith     sum4 = 0.0;
7976cd98798SBarry Smith     sum5 = 0.0;
7986cd98798SBarry Smith     sum6 = 0.0;
79926fbe8dcSKarl Rupp 
800b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
8016cd98798SBarry Smith     for (j = 0; j < n; j++) {
8026cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8036cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8046cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8056cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8066cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8076cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8086cd98798SBarry Smith       jrow++;
8096cd98798SBarry Smith     }
8106cd98798SBarry Smith     y[6 * i]     = sum1;
8116cd98798SBarry Smith     y[6 * i + 1] = sum2;
8126cd98798SBarry Smith     y[6 * i + 2] = sum3;
8136cd98798SBarry Smith     y[6 * i + 3] = sum4;
8146cd98798SBarry Smith     y[6 * i + 4] = sum5;
8156cd98798SBarry Smith     y[6 * i + 5] = sum6;
8166cd98798SBarry Smith   }
8176cd98798SBarry Smith 
8189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
8199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8216cd98798SBarry Smith   PetscFunctionReturn(0);
8226cd98798SBarry Smith }
8236cd98798SBarry Smith 
8249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
8256cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8266cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
827d2840e09SBarry Smith   const PetscScalar *x, *v;
828d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
829d2840e09SBarry Smith   PetscInt           n, i;
830d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
8316cd98798SBarry Smith 
8326cd98798SBarry Smith   PetscFunctionBegin;
8339566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
8349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
836bfec09a0SHong Zhang 
8376cd98798SBarry Smith   for (i = 0; i < m; i++) {
838bfec09a0SHong Zhang     idx    = a->j + a->i[i];
839bfec09a0SHong Zhang     v      = a->a + a->i[i];
8406cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
8416cd98798SBarry Smith     alpha1 = x[6 * i];
8426cd98798SBarry Smith     alpha2 = x[6 * i + 1];
8436cd98798SBarry Smith     alpha3 = x[6 * i + 2];
8446cd98798SBarry Smith     alpha4 = x[6 * i + 3];
8456cd98798SBarry Smith     alpha5 = x[6 * i + 4];
8466cd98798SBarry Smith     alpha6 = x[6 * i + 5];
8476cd98798SBarry Smith     while (n-- > 0) {
8486cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
8496cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
8506cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
8516cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
8526cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
8536cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
8549371c9d4SSatish Balay       idx++;
8559371c9d4SSatish Balay       v++;
8566cd98798SBarry Smith     }
8576cd98798SBarry Smith   }
8589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
8599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8616cd98798SBarry Smith   PetscFunctionReturn(0);
8626cd98798SBarry Smith }
8636cd98798SBarry Smith 
8649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
8656cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8666cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
867d2840e09SBarry Smith   const PetscScalar *x, *v;
868d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
869b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
870d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
8716cd98798SBarry Smith 
8726cd98798SBarry Smith   PetscFunctionBegin;
8739566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
8749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
8766cd98798SBarry Smith   idx = a->j;
8776cd98798SBarry Smith   v   = a->a;
8786cd98798SBarry Smith   ii  = a->i;
8796cd98798SBarry Smith 
8806cd98798SBarry Smith   for (i = 0; i < m; i++) {
8816cd98798SBarry Smith     jrow = ii[i];
8826cd98798SBarry Smith     n    = ii[i + 1] - jrow;
8836cd98798SBarry Smith     sum1 = 0.0;
8846cd98798SBarry Smith     sum2 = 0.0;
8856cd98798SBarry Smith     sum3 = 0.0;
8866cd98798SBarry Smith     sum4 = 0.0;
8876cd98798SBarry Smith     sum5 = 0.0;
8886cd98798SBarry Smith     sum6 = 0.0;
8896cd98798SBarry Smith     for (j = 0; j < n; j++) {
8906cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8916cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8926cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8936cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8946cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8956cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8966cd98798SBarry Smith       jrow++;
8976cd98798SBarry Smith     }
8986cd98798SBarry Smith     y[6 * i] += sum1;
8996cd98798SBarry Smith     y[6 * i + 1] += sum2;
9006cd98798SBarry Smith     y[6 * i + 2] += sum3;
9016cd98798SBarry Smith     y[6 * i + 3] += sum4;
9026cd98798SBarry Smith     y[6 * i + 4] += sum5;
9036cd98798SBarry Smith     y[6 * i + 5] += sum6;
9046cd98798SBarry Smith   }
9056cd98798SBarry Smith 
9069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9096cd98798SBarry Smith   PetscFunctionReturn(0);
9106cd98798SBarry Smith }
9116cd98798SBarry Smith 
9129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
9136cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
9146cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
915d2840e09SBarry Smith   const PetscScalar *x, *v;
916d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
917d2840e09SBarry Smith   PetscInt           n, i;
918d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
9196cd98798SBarry Smith 
9206cd98798SBarry Smith   PetscFunctionBegin;
9219566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
9229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
924bfec09a0SHong Zhang 
9256cd98798SBarry Smith   for (i = 0; i < m; i++) {
926bfec09a0SHong Zhang     idx    = a->j + a->i[i];
927bfec09a0SHong Zhang     v      = a->a + a->i[i];
9286cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
9296cd98798SBarry Smith     alpha1 = x[6 * i];
9306cd98798SBarry Smith     alpha2 = x[6 * i + 1];
9316cd98798SBarry Smith     alpha3 = x[6 * i + 2];
9326cd98798SBarry Smith     alpha4 = x[6 * i + 3];
9336cd98798SBarry Smith     alpha5 = x[6 * i + 4];
9346cd98798SBarry Smith     alpha6 = x[6 * i + 5];
9356cd98798SBarry Smith     while (n-- > 0) {
9366cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
9376cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
9386cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
9396cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
9406cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
9416cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
9429371c9d4SSatish Balay       idx++;
9439371c9d4SSatish Balay       v++;
9446cd98798SBarry Smith     }
9456cd98798SBarry Smith   }
9469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9496cd98798SBarry Smith   PetscFunctionReturn(0);
9506cd98798SBarry Smith }
9516cd98798SBarry Smith 
95266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
9539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
954ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
955ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
956d2840e09SBarry Smith   const PetscScalar *x, *v;
957d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
958d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
959d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
960ed8eea03SBarry Smith 
961ed8eea03SBarry Smith   PetscFunctionBegin;
9629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
964ed8eea03SBarry Smith   idx = a->j;
965ed8eea03SBarry Smith   v   = a->a;
966ed8eea03SBarry Smith   ii  = a->i;
967ed8eea03SBarry Smith 
968ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
969ed8eea03SBarry Smith     jrow = ii[i];
970ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
971ed8eea03SBarry Smith     sum1 = 0.0;
972ed8eea03SBarry Smith     sum2 = 0.0;
973ed8eea03SBarry Smith     sum3 = 0.0;
974ed8eea03SBarry Smith     sum4 = 0.0;
975ed8eea03SBarry Smith     sum5 = 0.0;
976ed8eea03SBarry Smith     sum6 = 0.0;
977ed8eea03SBarry Smith     sum7 = 0.0;
97826fbe8dcSKarl Rupp 
979b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
980ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
981ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
982ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
983ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
984ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
985ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
986ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
987ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
988ed8eea03SBarry Smith       jrow++;
989ed8eea03SBarry Smith     }
990ed8eea03SBarry Smith     y[7 * i]     = sum1;
991ed8eea03SBarry Smith     y[7 * i + 1] = sum2;
992ed8eea03SBarry Smith     y[7 * i + 2] = sum3;
993ed8eea03SBarry Smith     y[7 * i + 3] = sum4;
994ed8eea03SBarry Smith     y[7 * i + 4] = sum5;
995ed8eea03SBarry Smith     y[7 * i + 5] = sum6;
996ed8eea03SBarry Smith     y[7 * i + 6] = sum7;
997ed8eea03SBarry Smith   }
998ed8eea03SBarry Smith 
9999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
10009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1002ed8eea03SBarry Smith   PetscFunctionReturn(0);
1003ed8eea03SBarry Smith }
1004ed8eea03SBarry Smith 
10059371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
1006ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1007ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1008d2840e09SBarry Smith   const PetscScalar *x, *v;
1009d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1010d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1011d2840e09SBarry Smith   PetscInt           n, i;
1012ed8eea03SBarry Smith 
1013ed8eea03SBarry Smith   PetscFunctionBegin;
10149566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
10159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1017ed8eea03SBarry Smith 
1018ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1019ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1020ed8eea03SBarry Smith     v      = a->a + a->i[i];
1021ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1022ed8eea03SBarry Smith     alpha1 = x[7 * i];
1023ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1024ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1025ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1026ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1027ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1028ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1029ed8eea03SBarry Smith     while (n-- > 0) {
1030ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1031ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1032ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1033ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1034ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1035ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1036ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
10379371c9d4SSatish Balay       idx++;
10389371c9d4SSatish Balay       v++;
1039ed8eea03SBarry Smith     }
1040ed8eea03SBarry Smith   }
10419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1044ed8eea03SBarry Smith   PetscFunctionReturn(0);
1045ed8eea03SBarry Smith }
1046ed8eea03SBarry Smith 
10479371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1048ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1049ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1050d2840e09SBarry Smith   const PetscScalar *x, *v;
1051d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1052d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1053ed8eea03SBarry Smith   PetscInt           n, i, jrow, j;
1054ed8eea03SBarry Smith 
1055ed8eea03SBarry Smith   PetscFunctionBegin;
10569566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
10579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1059ed8eea03SBarry Smith   idx = a->j;
1060ed8eea03SBarry Smith   v   = a->a;
1061ed8eea03SBarry Smith   ii  = a->i;
1062ed8eea03SBarry Smith 
1063ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1064ed8eea03SBarry Smith     jrow = ii[i];
1065ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
1066ed8eea03SBarry Smith     sum1 = 0.0;
1067ed8eea03SBarry Smith     sum2 = 0.0;
1068ed8eea03SBarry Smith     sum3 = 0.0;
1069ed8eea03SBarry Smith     sum4 = 0.0;
1070ed8eea03SBarry Smith     sum5 = 0.0;
1071ed8eea03SBarry Smith     sum6 = 0.0;
1072ed8eea03SBarry Smith     sum7 = 0.0;
1073ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
1074ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
1075ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1076ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1077ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1078ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1079ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1080ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1081ed8eea03SBarry Smith       jrow++;
1082ed8eea03SBarry Smith     }
1083ed8eea03SBarry Smith     y[7 * i] += sum1;
1084ed8eea03SBarry Smith     y[7 * i + 1] += sum2;
1085ed8eea03SBarry Smith     y[7 * i + 2] += sum3;
1086ed8eea03SBarry Smith     y[7 * i + 3] += sum4;
1087ed8eea03SBarry Smith     y[7 * i + 4] += sum5;
1088ed8eea03SBarry Smith     y[7 * i + 5] += sum6;
1089ed8eea03SBarry Smith     y[7 * i + 6] += sum7;
1090ed8eea03SBarry Smith   }
1091ed8eea03SBarry Smith 
10929566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1095ed8eea03SBarry Smith   PetscFunctionReturn(0);
1096ed8eea03SBarry Smith }
1097ed8eea03SBarry Smith 
10989371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1099ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1100ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1101d2840e09SBarry Smith   const PetscScalar *x, *v;
1102d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1103d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1104d2840e09SBarry Smith   PetscInt           n, i;
1105ed8eea03SBarry Smith 
1106ed8eea03SBarry Smith   PetscFunctionBegin;
11079566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
11089566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1110ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1111ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1112ed8eea03SBarry Smith     v      = a->a + a->i[i];
1113ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1114ed8eea03SBarry Smith     alpha1 = x[7 * i];
1115ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1116ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1117ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1118ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1119ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1120ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1121ed8eea03SBarry Smith     while (n-- > 0) {
1122ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1123ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1124ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1125ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1126ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1127ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1128ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
11299371c9d4SSatish Balay       idx++;
11309371c9d4SSatish Balay       v++;
1131ed8eea03SBarry Smith     }
1132ed8eea03SBarry Smith   }
11339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1136ed8eea03SBarry Smith   PetscFunctionReturn(0);
1137ed8eea03SBarry Smith }
1138ed8eea03SBarry Smith 
11399371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
114066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
114166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1142d2840e09SBarry Smith   const PetscScalar *x, *v;
1143d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1144d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1145d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
114666ed3db0SBarry Smith 
114766ed3db0SBarry Smith   PetscFunctionBegin;
11489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
115066ed3db0SBarry Smith   idx = a->j;
115166ed3db0SBarry Smith   v   = a->a;
115266ed3db0SBarry Smith   ii  = a->i;
115366ed3db0SBarry Smith 
115466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
115566ed3db0SBarry Smith     jrow = ii[i];
115666ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
115766ed3db0SBarry Smith     sum1 = 0.0;
115866ed3db0SBarry Smith     sum2 = 0.0;
115966ed3db0SBarry Smith     sum3 = 0.0;
116066ed3db0SBarry Smith     sum4 = 0.0;
116166ed3db0SBarry Smith     sum5 = 0.0;
116266ed3db0SBarry Smith     sum6 = 0.0;
116366ed3db0SBarry Smith     sum7 = 0.0;
116466ed3db0SBarry Smith     sum8 = 0.0;
116526fbe8dcSKarl Rupp 
1166b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
116766ed3db0SBarry Smith     for (j = 0; j < n; j++) {
116866ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
116966ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
117066ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
117166ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
117266ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
117366ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
117466ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
117566ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
117666ed3db0SBarry Smith       jrow++;
117766ed3db0SBarry Smith     }
117866ed3db0SBarry Smith     y[8 * i]     = sum1;
117966ed3db0SBarry Smith     y[8 * i + 1] = sum2;
118066ed3db0SBarry Smith     y[8 * i + 2] = sum3;
118166ed3db0SBarry Smith     y[8 * i + 3] = sum4;
118266ed3db0SBarry Smith     y[8 * i + 4] = sum5;
118366ed3db0SBarry Smith     y[8 * i + 5] = sum6;
118466ed3db0SBarry Smith     y[8 * i + 6] = sum7;
118566ed3db0SBarry Smith     y[8 * i + 7] = sum8;
118666ed3db0SBarry Smith   }
118766ed3db0SBarry Smith 
11889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
11899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
119166ed3db0SBarry Smith   PetscFunctionReturn(0);
119266ed3db0SBarry Smith }
119366ed3db0SBarry Smith 
11949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
119566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
119666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1197d2840e09SBarry Smith   const PetscScalar *x, *v;
1198d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1199d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1200d2840e09SBarry Smith   PetscInt           n, i;
120166ed3db0SBarry Smith 
120266ed3db0SBarry Smith   PetscFunctionBegin;
12039566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
12049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12059566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1206bfec09a0SHong Zhang 
120766ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1208bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1209bfec09a0SHong Zhang     v      = a->a + a->i[i];
121066ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
121166ed3db0SBarry Smith     alpha1 = x[8 * i];
121266ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
121366ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
121466ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
121566ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
121666ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
121766ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
121866ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
121966ed3db0SBarry Smith     while (n-- > 0) {
122066ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
122166ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
122266ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
122366ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
122466ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
122566ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
122666ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
122766ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
12289371c9d4SSatish Balay       idx++;
12299371c9d4SSatish Balay       v++;
123066ed3db0SBarry Smith     }
123166ed3db0SBarry Smith   }
12329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
123566ed3db0SBarry Smith   PetscFunctionReturn(0);
123666ed3db0SBarry Smith }
123766ed3db0SBarry Smith 
12389371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
123966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
124066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1241d2840e09SBarry Smith   const PetscScalar *x, *v;
1242d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1243d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1244b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
124566ed3db0SBarry Smith 
124666ed3db0SBarry Smith   PetscFunctionBegin;
12479566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
12489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
125066ed3db0SBarry Smith   idx = a->j;
125166ed3db0SBarry Smith   v   = a->a;
125266ed3db0SBarry Smith   ii  = a->i;
125366ed3db0SBarry Smith 
125466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
125566ed3db0SBarry Smith     jrow = ii[i];
125666ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
125766ed3db0SBarry Smith     sum1 = 0.0;
125866ed3db0SBarry Smith     sum2 = 0.0;
125966ed3db0SBarry Smith     sum3 = 0.0;
126066ed3db0SBarry Smith     sum4 = 0.0;
126166ed3db0SBarry Smith     sum5 = 0.0;
126266ed3db0SBarry Smith     sum6 = 0.0;
126366ed3db0SBarry Smith     sum7 = 0.0;
126466ed3db0SBarry Smith     sum8 = 0.0;
126566ed3db0SBarry Smith     for (j = 0; j < n; j++) {
126666ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
126766ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
126866ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
126966ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
127066ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
127166ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
127266ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
127366ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
127466ed3db0SBarry Smith       jrow++;
127566ed3db0SBarry Smith     }
127666ed3db0SBarry Smith     y[8 * i] += sum1;
127766ed3db0SBarry Smith     y[8 * i + 1] += sum2;
127866ed3db0SBarry Smith     y[8 * i + 2] += sum3;
127966ed3db0SBarry Smith     y[8 * i + 3] += sum4;
128066ed3db0SBarry Smith     y[8 * i + 4] += sum5;
128166ed3db0SBarry Smith     y[8 * i + 5] += sum6;
128266ed3db0SBarry Smith     y[8 * i + 6] += sum7;
128366ed3db0SBarry Smith     y[8 * i + 7] += sum8;
128466ed3db0SBarry Smith   }
128566ed3db0SBarry Smith 
12869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
128966ed3db0SBarry Smith   PetscFunctionReturn(0);
129066ed3db0SBarry Smith }
129166ed3db0SBarry Smith 
12929371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
129366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
129466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1295d2840e09SBarry Smith   const PetscScalar *x, *v;
1296d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1297d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1298d2840e09SBarry Smith   PetscInt           n, i;
129966ed3db0SBarry Smith 
130066ed3db0SBarry Smith   PetscFunctionBegin;
13019566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
13029566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13039566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
130466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1305bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1306bfec09a0SHong Zhang     v      = a->a + a->i[i];
130766ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
130866ed3db0SBarry Smith     alpha1 = x[8 * i];
130966ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
131066ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
131166ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
131266ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
131366ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
131466ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
131566ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
131666ed3db0SBarry Smith     while (n-- > 0) {
131766ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
131866ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
131966ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
132066ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
132166ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
132266ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
132366ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
132466ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
13259371c9d4SSatish Balay       idx++;
13269371c9d4SSatish Balay       v++;
132766ed3db0SBarry Smith     }
132866ed3db0SBarry Smith   }
13299566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
13322f7816d4SBarry Smith   PetscFunctionReturn(0);
13332f7816d4SBarry Smith }
13342f7816d4SBarry Smith 
13352b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13369371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
13372b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13382b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1339d2840e09SBarry Smith   const PetscScalar *x, *v;
1340d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1341d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1342d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
13432b692628SMatthew Knepley 
13442b692628SMatthew Knepley   PetscFunctionBegin;
13459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
13472b692628SMatthew Knepley   idx = a->j;
13482b692628SMatthew Knepley   v   = a->a;
13492b692628SMatthew Knepley   ii  = a->i;
13502b692628SMatthew Knepley 
13512b692628SMatthew Knepley   for (i = 0; i < m; i++) {
13522b692628SMatthew Knepley     jrow = ii[i];
13532b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
13542b692628SMatthew Knepley     sum1 = 0.0;
13552b692628SMatthew Knepley     sum2 = 0.0;
13562b692628SMatthew Knepley     sum3 = 0.0;
13572b692628SMatthew Knepley     sum4 = 0.0;
13582b692628SMatthew Knepley     sum5 = 0.0;
13592b692628SMatthew Knepley     sum6 = 0.0;
13602b692628SMatthew Knepley     sum7 = 0.0;
13612b692628SMatthew Knepley     sum8 = 0.0;
13622b692628SMatthew Knepley     sum9 = 0.0;
136326fbe8dcSKarl Rupp 
1364b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
13652b692628SMatthew Knepley     for (j = 0; j < n; j++) {
13662b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
13672b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
13682b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
13692b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
13702b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
13712b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
13722b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
13732b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
13742b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
13752b692628SMatthew Knepley       jrow++;
13762b692628SMatthew Knepley     }
13772b692628SMatthew Knepley     y[9 * i]     = sum1;
13782b692628SMatthew Knepley     y[9 * i + 1] = sum2;
13792b692628SMatthew Knepley     y[9 * i + 2] = sum3;
13802b692628SMatthew Knepley     y[9 * i + 3] = sum4;
13812b692628SMatthew Knepley     y[9 * i + 4] = sum5;
13822b692628SMatthew Knepley     y[9 * i + 5] = sum6;
13832b692628SMatthew Knepley     y[9 * i + 6] = sum7;
13842b692628SMatthew Knepley     y[9 * i + 7] = sum8;
13852b692628SMatthew Knepley     y[9 * i + 8] = sum9;
13862b692628SMatthew Knepley   }
13872b692628SMatthew Knepley 
13889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
13899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
13912b692628SMatthew Knepley   PetscFunctionReturn(0);
13922b692628SMatthew Knepley }
13932b692628SMatthew Knepley 
1394b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1395b9cda22cSBarry Smith 
13969371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
13972b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13982b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1399d2840e09SBarry Smith   const PetscScalar *x, *v;
1400d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1401d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1402d2840e09SBarry Smith   PetscInt           n, i;
14032b692628SMatthew Knepley 
14042b692628SMatthew Knepley   PetscFunctionBegin;
14059566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
14069566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14079566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
14082b692628SMatthew Knepley 
14092b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14102b692628SMatthew Knepley     idx    = a->j + a->i[i];
14112b692628SMatthew Knepley     v      = a->a + a->i[i];
14122b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
14132b692628SMatthew Knepley     alpha1 = x[9 * i];
14142b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
14152b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
14162b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
14172b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
14182b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
14192b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
14202b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
14212f6bd0c6SMatthew Knepley     alpha9 = x[9 * i + 8];
14222b692628SMatthew Knepley     while (n-- > 0) {
14232b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
14242b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
14252b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
14262b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
14272b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
14282b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
14292b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
14302b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
14312b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
14329371c9d4SSatish Balay       idx++;
14339371c9d4SSatish Balay       v++;
14342b692628SMatthew Knepley     }
14352b692628SMatthew Knepley   }
14369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14392b692628SMatthew Knepley   PetscFunctionReturn(0);
14402b692628SMatthew Knepley }
14412b692628SMatthew Knepley 
14429371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
14432b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14442b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1445d2840e09SBarry Smith   const PetscScalar *x, *v;
1446d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1447d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1448b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
14492b692628SMatthew Knepley 
14502b692628SMatthew Knepley   PetscFunctionBegin;
14519566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
14529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
14542b692628SMatthew Knepley   idx = a->j;
14552b692628SMatthew Knepley   v   = a->a;
14562b692628SMatthew Knepley   ii  = a->i;
14572b692628SMatthew Knepley 
14582b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14592b692628SMatthew Knepley     jrow = ii[i];
14602b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
14612b692628SMatthew Knepley     sum1 = 0.0;
14622b692628SMatthew Knepley     sum2 = 0.0;
14632b692628SMatthew Knepley     sum3 = 0.0;
14642b692628SMatthew Knepley     sum4 = 0.0;
14652b692628SMatthew Knepley     sum5 = 0.0;
14662b692628SMatthew Knepley     sum6 = 0.0;
14672b692628SMatthew Knepley     sum7 = 0.0;
14682b692628SMatthew Knepley     sum8 = 0.0;
14692b692628SMatthew Knepley     sum9 = 0.0;
14702b692628SMatthew Knepley     for (j = 0; j < n; j++) {
14712b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
14722b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
14732b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
14742b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
14752b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
14762b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
14772b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
14782b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
14792b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
14802b692628SMatthew Knepley       jrow++;
14812b692628SMatthew Knepley     }
14822b692628SMatthew Knepley     y[9 * i] += sum1;
14832b692628SMatthew Knepley     y[9 * i + 1] += sum2;
14842b692628SMatthew Knepley     y[9 * i + 2] += sum3;
14852b692628SMatthew Knepley     y[9 * i + 3] += sum4;
14862b692628SMatthew Knepley     y[9 * i + 4] += sum5;
14872b692628SMatthew Knepley     y[9 * i + 5] += sum6;
14882b692628SMatthew Knepley     y[9 * i + 6] += sum7;
14892b692628SMatthew Knepley     y[9 * i + 7] += sum8;
14902b692628SMatthew Knepley     y[9 * i + 8] += sum9;
14912b692628SMatthew Knepley   }
14922b692628SMatthew Knepley 
14939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
14962b692628SMatthew Knepley   PetscFunctionReturn(0);
14972b692628SMatthew Knepley }
14982b692628SMatthew Knepley 
14999371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
15002b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
15012b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1502d2840e09SBarry Smith   const PetscScalar *x, *v;
1503d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1504d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1505d2840e09SBarry Smith   PetscInt           n, i;
15062b692628SMatthew Knepley 
15072b692628SMatthew Knepley   PetscFunctionBegin;
15089566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
15099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
15112b692628SMatthew Knepley   for (i = 0; i < m; i++) {
15122b692628SMatthew Knepley     idx    = a->j + a->i[i];
15132b692628SMatthew Knepley     v      = a->a + a->i[i];
15142b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
15152b692628SMatthew Knepley     alpha1 = x[9 * i];
15162b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
15172b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
15182b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
15192b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
15202b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
15212b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
15222b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
15232b692628SMatthew Knepley     alpha9 = x[9 * i + 8];
15242b692628SMatthew Knepley     while (n-- > 0) {
15252b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
15262b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
15272b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
15282b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
15292b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
15302b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
15312b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
15322b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
15332b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
15349371c9d4SSatish Balay       idx++;
15359371c9d4SSatish Balay       v++;
15362b692628SMatthew Knepley     }
15372b692628SMatthew Knepley   }
15389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15412b692628SMatthew Knepley   PetscFunctionReturn(0);
15422b692628SMatthew Knepley }
15439371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1544b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1545b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1546d2840e09SBarry Smith   const PetscScalar *x, *v;
1547d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1548d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1549d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1550b9cda22cSBarry Smith 
1551b9cda22cSBarry Smith   PetscFunctionBegin;
15529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1554b9cda22cSBarry Smith   idx = a->j;
1555b9cda22cSBarry Smith   v   = a->a;
1556b9cda22cSBarry Smith   ii  = a->i;
1557b9cda22cSBarry Smith 
1558b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1559b9cda22cSBarry Smith     jrow  = ii[i];
1560b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1561b9cda22cSBarry Smith     sum1  = 0.0;
1562b9cda22cSBarry Smith     sum2  = 0.0;
1563b9cda22cSBarry Smith     sum3  = 0.0;
1564b9cda22cSBarry Smith     sum4  = 0.0;
1565b9cda22cSBarry Smith     sum5  = 0.0;
1566b9cda22cSBarry Smith     sum6  = 0.0;
1567b9cda22cSBarry Smith     sum7  = 0.0;
1568b9cda22cSBarry Smith     sum8  = 0.0;
1569b9cda22cSBarry Smith     sum9  = 0.0;
1570b9cda22cSBarry Smith     sum10 = 0.0;
157126fbe8dcSKarl Rupp 
1572b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
1573b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1574b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1575b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1576b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1577b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1578b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1579b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1580b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1581b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1582b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1583b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1584b9cda22cSBarry Smith       jrow++;
1585b9cda22cSBarry Smith     }
1586b9cda22cSBarry Smith     y[10 * i]     = sum1;
1587b9cda22cSBarry Smith     y[10 * i + 1] = sum2;
1588b9cda22cSBarry Smith     y[10 * i + 2] = sum3;
1589b9cda22cSBarry Smith     y[10 * i + 3] = sum4;
1590b9cda22cSBarry Smith     y[10 * i + 4] = sum5;
1591b9cda22cSBarry Smith     y[10 * i + 5] = sum6;
1592b9cda22cSBarry Smith     y[10 * i + 6] = sum7;
1593b9cda22cSBarry Smith     y[10 * i + 7] = sum8;
1594b9cda22cSBarry Smith     y[10 * i + 8] = sum9;
1595b9cda22cSBarry Smith     y[10 * i + 9] = sum10;
1596b9cda22cSBarry Smith   }
1597b9cda22cSBarry Smith 
15989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
15999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1601b9cda22cSBarry Smith   PetscFunctionReturn(0);
1602b9cda22cSBarry Smith }
1603b9cda22cSBarry Smith 
16049371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1605b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1606b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1607d2840e09SBarry Smith   const PetscScalar *x, *v;
1608d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1610b9cda22cSBarry Smith   PetscInt           n, i, jrow, j;
1611b9cda22cSBarry Smith 
1612b9cda22cSBarry Smith   PetscFunctionBegin;
16139566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
16149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1616b9cda22cSBarry Smith   idx = a->j;
1617b9cda22cSBarry Smith   v   = a->a;
1618b9cda22cSBarry Smith   ii  = a->i;
1619b9cda22cSBarry Smith 
1620b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1621b9cda22cSBarry Smith     jrow  = ii[i];
1622b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1623b9cda22cSBarry Smith     sum1  = 0.0;
1624b9cda22cSBarry Smith     sum2  = 0.0;
1625b9cda22cSBarry Smith     sum3  = 0.0;
1626b9cda22cSBarry Smith     sum4  = 0.0;
1627b9cda22cSBarry Smith     sum5  = 0.0;
1628b9cda22cSBarry Smith     sum6  = 0.0;
1629b9cda22cSBarry Smith     sum7  = 0.0;
1630b9cda22cSBarry Smith     sum8  = 0.0;
1631b9cda22cSBarry Smith     sum9  = 0.0;
1632b9cda22cSBarry Smith     sum10 = 0.0;
1633b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1634b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1635b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1636b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1637b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1638b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1639b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1640b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1641b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1642b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1643b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1644b9cda22cSBarry Smith       jrow++;
1645b9cda22cSBarry Smith     }
1646b9cda22cSBarry Smith     y[10 * i] += sum1;
1647b9cda22cSBarry Smith     y[10 * i + 1] += sum2;
1648b9cda22cSBarry Smith     y[10 * i + 2] += sum3;
1649b9cda22cSBarry Smith     y[10 * i + 3] += sum4;
1650b9cda22cSBarry Smith     y[10 * i + 4] += sum5;
1651b9cda22cSBarry Smith     y[10 * i + 5] += sum6;
1652b9cda22cSBarry Smith     y[10 * i + 6] += sum7;
1653b9cda22cSBarry Smith     y[10 * i + 7] += sum8;
1654b9cda22cSBarry Smith     y[10 * i + 8] += sum9;
1655b9cda22cSBarry Smith     y[10 * i + 9] += sum10;
1656b9cda22cSBarry Smith   }
1657b9cda22cSBarry Smith 
16589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
16599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1661b9cda22cSBarry Smith   PetscFunctionReturn(0);
1662b9cda22cSBarry Smith }
1663b9cda22cSBarry Smith 
16649371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1665b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1666b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1667d2840e09SBarry Smith   const PetscScalar *x, *v;
1668d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1669d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1670d2840e09SBarry Smith   PetscInt           n, i;
1671b9cda22cSBarry Smith 
1672b9cda22cSBarry Smith   PetscFunctionBegin;
16739566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
16749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1676b9cda22cSBarry Smith 
1677b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1678b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1679b9cda22cSBarry Smith     v       = a->a + a->i[i];
1680b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1681e29fdc22SBarry Smith     alpha1  = x[10 * i];
1682e29fdc22SBarry Smith     alpha2  = x[10 * i + 1];
1683e29fdc22SBarry Smith     alpha3  = x[10 * i + 2];
1684e29fdc22SBarry Smith     alpha4  = x[10 * i + 3];
1685e29fdc22SBarry Smith     alpha5  = x[10 * i + 4];
1686e29fdc22SBarry Smith     alpha6  = x[10 * i + 5];
1687e29fdc22SBarry Smith     alpha7  = x[10 * i + 6];
1688e29fdc22SBarry Smith     alpha8  = x[10 * i + 7];
1689e29fdc22SBarry Smith     alpha9  = x[10 * i + 8];
1690b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1691b9cda22cSBarry Smith     while (n-- > 0) {
1692e29fdc22SBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1693e29fdc22SBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1694e29fdc22SBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1695e29fdc22SBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1696e29fdc22SBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1697e29fdc22SBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1698e29fdc22SBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1699e29fdc22SBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1700e29fdc22SBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1701b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17029371c9d4SSatish Balay       idx++;
17039371c9d4SSatish Balay       v++;
1704b9cda22cSBarry Smith     }
1705b9cda22cSBarry Smith   }
17069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1709b9cda22cSBarry Smith   PetscFunctionReturn(0);
1710b9cda22cSBarry Smith }
1711b9cda22cSBarry Smith 
17129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1713b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1714b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1715d2840e09SBarry Smith   const PetscScalar *x, *v;
1716d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1717d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1718d2840e09SBarry Smith   PetscInt           n, i;
1719b9cda22cSBarry Smith 
1720b9cda22cSBarry Smith   PetscFunctionBegin;
17219566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
17229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1724b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1725b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1726b9cda22cSBarry Smith     v       = a->a + a->i[i];
1727b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1728b9cda22cSBarry Smith     alpha1  = x[10 * i];
1729b9cda22cSBarry Smith     alpha2  = x[10 * i + 1];
1730b9cda22cSBarry Smith     alpha3  = x[10 * i + 2];
1731b9cda22cSBarry Smith     alpha4  = x[10 * i + 3];
1732b9cda22cSBarry Smith     alpha5  = x[10 * i + 4];
1733b9cda22cSBarry Smith     alpha6  = x[10 * i + 5];
1734b9cda22cSBarry Smith     alpha7  = x[10 * i + 6];
1735b9cda22cSBarry Smith     alpha8  = x[10 * i + 7];
1736b9cda22cSBarry Smith     alpha9  = x[10 * i + 8];
1737b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1738b9cda22cSBarry Smith     while (n-- > 0) {
1739b9cda22cSBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1740b9cda22cSBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1741b9cda22cSBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1742b9cda22cSBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1743b9cda22cSBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1744b9cda22cSBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1745b9cda22cSBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1746b9cda22cSBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1747b9cda22cSBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1748b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17499371c9d4SSatish Balay       idx++;
17509371c9d4SSatish Balay       v++;
1751b9cda22cSBarry Smith     }
1752b9cda22cSBarry Smith   }
17539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1756b9cda22cSBarry Smith   PetscFunctionReturn(0);
1757b9cda22cSBarry Smith }
1758b9cda22cSBarry Smith 
17592f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
17609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1761dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1762dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1763d2840e09SBarry Smith   const PetscScalar *x, *v;
1764d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1765d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1766d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1767dbdd7285SBarry Smith 
1768dbdd7285SBarry Smith   PetscFunctionBegin;
17699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1771dbdd7285SBarry Smith   idx = a->j;
1772dbdd7285SBarry Smith   v   = a->a;
1773dbdd7285SBarry Smith   ii  = a->i;
1774dbdd7285SBarry Smith 
1775dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1776dbdd7285SBarry Smith     jrow  = ii[i];
1777dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1778dbdd7285SBarry Smith     sum1  = 0.0;
1779dbdd7285SBarry Smith     sum2  = 0.0;
1780dbdd7285SBarry Smith     sum3  = 0.0;
1781dbdd7285SBarry Smith     sum4  = 0.0;
1782dbdd7285SBarry Smith     sum5  = 0.0;
1783dbdd7285SBarry Smith     sum6  = 0.0;
1784dbdd7285SBarry Smith     sum7  = 0.0;
1785dbdd7285SBarry Smith     sum8  = 0.0;
1786dbdd7285SBarry Smith     sum9  = 0.0;
1787dbdd7285SBarry Smith     sum10 = 0.0;
1788dbdd7285SBarry Smith     sum11 = 0.0;
178926fbe8dcSKarl Rupp 
1790dbdd7285SBarry Smith     nonzerorow += (n > 0);
1791dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1792dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1793dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1794dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1795dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1796dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1797dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1798dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1799dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1800dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1801dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1802dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1803dbdd7285SBarry Smith       jrow++;
1804dbdd7285SBarry Smith     }
1805dbdd7285SBarry Smith     y[11 * i]      = sum1;
1806dbdd7285SBarry Smith     y[11 * i + 1]  = sum2;
1807dbdd7285SBarry Smith     y[11 * i + 2]  = sum3;
1808dbdd7285SBarry Smith     y[11 * i + 3]  = sum4;
1809dbdd7285SBarry Smith     y[11 * i + 4]  = sum5;
1810dbdd7285SBarry Smith     y[11 * i + 5]  = sum6;
1811dbdd7285SBarry Smith     y[11 * i + 6]  = sum7;
1812dbdd7285SBarry Smith     y[11 * i + 7]  = sum8;
1813dbdd7285SBarry Smith     y[11 * i + 8]  = sum9;
1814dbdd7285SBarry Smith     y[11 * i + 9]  = sum10;
1815dbdd7285SBarry Smith     y[11 * i + 10] = sum11;
1816dbdd7285SBarry Smith   }
1817dbdd7285SBarry Smith 
18189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
18199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1821dbdd7285SBarry Smith   PetscFunctionReturn(0);
1822dbdd7285SBarry Smith }
1823dbdd7285SBarry Smith 
18249371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1825dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1826dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1827d2840e09SBarry Smith   const PetscScalar *x, *v;
1828d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1829d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1830dbdd7285SBarry Smith   PetscInt           n, i, jrow, j;
1831dbdd7285SBarry Smith 
1832dbdd7285SBarry Smith   PetscFunctionBegin;
18339566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
18349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1836dbdd7285SBarry Smith   idx = a->j;
1837dbdd7285SBarry Smith   v   = a->a;
1838dbdd7285SBarry Smith   ii  = a->i;
1839dbdd7285SBarry Smith 
1840dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1841dbdd7285SBarry Smith     jrow  = ii[i];
1842dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1843dbdd7285SBarry Smith     sum1  = 0.0;
1844dbdd7285SBarry Smith     sum2  = 0.0;
1845dbdd7285SBarry Smith     sum3  = 0.0;
1846dbdd7285SBarry Smith     sum4  = 0.0;
1847dbdd7285SBarry Smith     sum5  = 0.0;
1848dbdd7285SBarry Smith     sum6  = 0.0;
1849dbdd7285SBarry Smith     sum7  = 0.0;
1850dbdd7285SBarry Smith     sum8  = 0.0;
1851dbdd7285SBarry Smith     sum9  = 0.0;
1852dbdd7285SBarry Smith     sum10 = 0.0;
1853dbdd7285SBarry Smith     sum11 = 0.0;
1854dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1855dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1856dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1857dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1858dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1859dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1860dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1861dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1862dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1863dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1864dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1865dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1866dbdd7285SBarry Smith       jrow++;
1867dbdd7285SBarry Smith     }
1868dbdd7285SBarry Smith     y[11 * i] += sum1;
1869dbdd7285SBarry Smith     y[11 * i + 1] += sum2;
1870dbdd7285SBarry Smith     y[11 * i + 2] += sum3;
1871dbdd7285SBarry Smith     y[11 * i + 3] += sum4;
1872dbdd7285SBarry Smith     y[11 * i + 4] += sum5;
1873dbdd7285SBarry Smith     y[11 * i + 5] += sum6;
1874dbdd7285SBarry Smith     y[11 * i + 6] += sum7;
1875dbdd7285SBarry Smith     y[11 * i + 7] += sum8;
1876dbdd7285SBarry Smith     y[11 * i + 8] += sum9;
1877dbdd7285SBarry Smith     y[11 * i + 9] += sum10;
1878dbdd7285SBarry Smith     y[11 * i + 10] += sum11;
1879dbdd7285SBarry Smith   }
1880dbdd7285SBarry Smith 
18819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
18829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1884dbdd7285SBarry Smith   PetscFunctionReturn(0);
1885dbdd7285SBarry Smith }
1886dbdd7285SBarry Smith 
18879371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1888dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1889dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1890d2840e09SBarry Smith   const PetscScalar *x, *v;
1891d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1892d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1893d2840e09SBarry Smith   PetscInt           n, i;
1894dbdd7285SBarry Smith 
1895dbdd7285SBarry Smith   PetscFunctionBegin;
18969566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
18979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1899dbdd7285SBarry Smith 
1900dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1901dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1902dbdd7285SBarry Smith     v       = a->a + a->i[i];
1903dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1904dbdd7285SBarry Smith     alpha1  = x[11 * i];
1905dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1906dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1907dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1908dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1909dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1910dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1911dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1912dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1913dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1914dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1915dbdd7285SBarry Smith     while (n-- > 0) {
1916dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1917dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1918dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1919dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1920dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1921dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1922dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1923dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1924dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1925dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1926dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19279371c9d4SSatish Balay       idx++;
19289371c9d4SSatish Balay       v++;
1929dbdd7285SBarry Smith     }
1930dbdd7285SBarry Smith   }
19319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1934dbdd7285SBarry Smith   PetscFunctionReturn(0);
1935dbdd7285SBarry Smith }
1936dbdd7285SBarry Smith 
19379371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1938dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1939dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1940d2840e09SBarry Smith   const PetscScalar *x, *v;
1941d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1942d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1943d2840e09SBarry Smith   PetscInt           n, i;
1944dbdd7285SBarry Smith 
1945dbdd7285SBarry Smith   PetscFunctionBegin;
19469566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
19479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1949dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1950dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1951dbdd7285SBarry Smith     v       = a->a + a->i[i];
1952dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1953dbdd7285SBarry Smith     alpha1  = x[11 * i];
1954dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1955dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1956dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1957dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1958dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1959dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1960dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1961dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1962dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1963dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1964dbdd7285SBarry Smith     while (n-- > 0) {
1965dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1966dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1967dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1968dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1969dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1970dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1971dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1972dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1973dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1974dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1975dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19769371c9d4SSatish Balay       idx++;
19779371c9d4SSatish Balay       v++;
1978dbdd7285SBarry Smith     }
1979dbdd7285SBarry Smith   }
19809566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1983dbdd7285SBarry Smith   PetscFunctionReturn(0);
1984dbdd7285SBarry Smith }
1985dbdd7285SBarry Smith 
1986dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
19879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
19882f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
19892f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1990d2840e09SBarry Smith   const PetscScalar *x, *v;
1991d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
19922f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1993d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1994d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
19952f7816d4SBarry Smith 
19962f7816d4SBarry Smith   PetscFunctionBegin;
19979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
19992f7816d4SBarry Smith   idx = a->j;
20002f7816d4SBarry Smith   v   = a->a;
20012f7816d4SBarry Smith   ii  = a->i;
20022f7816d4SBarry Smith 
20032f7816d4SBarry Smith   for (i = 0; i < m; i++) {
20042f7816d4SBarry Smith     jrow  = ii[i];
20052f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
20062f7816d4SBarry Smith     sum1  = 0.0;
20072f7816d4SBarry Smith     sum2  = 0.0;
20082f7816d4SBarry Smith     sum3  = 0.0;
20092f7816d4SBarry Smith     sum4  = 0.0;
20102f7816d4SBarry Smith     sum5  = 0.0;
20112f7816d4SBarry Smith     sum6  = 0.0;
20122f7816d4SBarry Smith     sum7  = 0.0;
20132f7816d4SBarry Smith     sum8  = 0.0;
20142f7816d4SBarry Smith     sum9  = 0.0;
20152f7816d4SBarry Smith     sum10 = 0.0;
20162f7816d4SBarry Smith     sum11 = 0.0;
20172f7816d4SBarry Smith     sum12 = 0.0;
20182f7816d4SBarry Smith     sum13 = 0.0;
20192f7816d4SBarry Smith     sum14 = 0.0;
20202f7816d4SBarry Smith     sum15 = 0.0;
20212f7816d4SBarry Smith     sum16 = 0.0;
202226fbe8dcSKarl Rupp 
2023b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
20242f7816d4SBarry Smith     for (j = 0; j < n; j++) {
20252f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
20262f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
20272f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
20282f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
20292f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
20302f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
20312f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
20322f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
20332f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
20342f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
20352f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
20362f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
20372f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
20382f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
20392f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
20402f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
20412f7816d4SBarry Smith       jrow++;
20422f7816d4SBarry Smith     }
20432f7816d4SBarry Smith     y[16 * i]      = sum1;
20442f7816d4SBarry Smith     y[16 * i + 1]  = sum2;
20452f7816d4SBarry Smith     y[16 * i + 2]  = sum3;
20462f7816d4SBarry Smith     y[16 * i + 3]  = sum4;
20472f7816d4SBarry Smith     y[16 * i + 4]  = sum5;
20482f7816d4SBarry Smith     y[16 * i + 5]  = sum6;
20492f7816d4SBarry Smith     y[16 * i + 6]  = sum7;
20502f7816d4SBarry Smith     y[16 * i + 7]  = sum8;
20512f7816d4SBarry Smith     y[16 * i + 8]  = sum9;
20522f7816d4SBarry Smith     y[16 * i + 9]  = sum10;
20532f7816d4SBarry Smith     y[16 * i + 10] = sum11;
20542f7816d4SBarry Smith     y[16 * i + 11] = sum12;
20552f7816d4SBarry Smith     y[16 * i + 12] = sum13;
20562f7816d4SBarry Smith     y[16 * i + 13] = sum14;
20572f7816d4SBarry Smith     y[16 * i + 14] = sum15;
20582f7816d4SBarry Smith     y[16 * i + 15] = sum16;
20592f7816d4SBarry Smith   }
20602f7816d4SBarry Smith 
20619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
20629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
20639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
20642f7816d4SBarry Smith   PetscFunctionReturn(0);
20652f7816d4SBarry Smith }
20662f7816d4SBarry Smith 
20679371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
20682f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
20692f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2070d2840e09SBarry Smith   const PetscScalar *x, *v;
2071d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
20722f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2073d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2074d2840e09SBarry Smith   PetscInt           n, i;
20752f7816d4SBarry Smith 
20762f7816d4SBarry Smith   PetscFunctionBegin;
20779566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
20789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
20799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2080bfec09a0SHong Zhang 
20812f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2082bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2083bfec09a0SHong Zhang     v       = a->a + a->i[i];
20842f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
20852f7816d4SBarry Smith     alpha1  = x[16 * i];
20862f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
20872f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
20882f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
20892f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
20902f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
20912f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
20922f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
20932f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
20942f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
20952f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
20962f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
20972f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
20982f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
20992f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
21002f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
21012f7816d4SBarry Smith     while (n-- > 0) {
21022f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
21032f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
21042f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
21052f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
21062f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
21072f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
21082f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
21092f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
21102f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
21112f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
21122f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
21132f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
21142f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
21152f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
21162f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
21172f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
21189371c9d4SSatish Balay       idx++;
21199371c9d4SSatish Balay       v++;
21202f7816d4SBarry Smith     }
21212f7816d4SBarry Smith   }
21229566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
21239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
21249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21252f7816d4SBarry Smith   PetscFunctionReturn(0);
21262f7816d4SBarry Smith }
21272f7816d4SBarry Smith 
21289371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
21292f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
21302f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2131d2840e09SBarry Smith   const PetscScalar *x, *v;
2132d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21332f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2134d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2135b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
21362f7816d4SBarry Smith 
21372f7816d4SBarry Smith   PetscFunctionBegin;
21389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
21399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
21412f7816d4SBarry Smith   idx = a->j;
21422f7816d4SBarry Smith   v   = a->a;
21432f7816d4SBarry Smith   ii  = a->i;
21442f7816d4SBarry Smith 
21452f7816d4SBarry Smith   for (i = 0; i < m; i++) {
21462f7816d4SBarry Smith     jrow  = ii[i];
21472f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
21482f7816d4SBarry Smith     sum1  = 0.0;
21492f7816d4SBarry Smith     sum2  = 0.0;
21502f7816d4SBarry Smith     sum3  = 0.0;
21512f7816d4SBarry Smith     sum4  = 0.0;
21522f7816d4SBarry Smith     sum5  = 0.0;
21532f7816d4SBarry Smith     sum6  = 0.0;
21542f7816d4SBarry Smith     sum7  = 0.0;
21552f7816d4SBarry Smith     sum8  = 0.0;
21562f7816d4SBarry Smith     sum9  = 0.0;
21572f7816d4SBarry Smith     sum10 = 0.0;
21582f7816d4SBarry Smith     sum11 = 0.0;
21592f7816d4SBarry Smith     sum12 = 0.0;
21602f7816d4SBarry Smith     sum13 = 0.0;
21612f7816d4SBarry Smith     sum14 = 0.0;
21622f7816d4SBarry Smith     sum15 = 0.0;
21632f7816d4SBarry Smith     sum16 = 0.0;
21642f7816d4SBarry Smith     for (j = 0; j < n; j++) {
21652f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
21662f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
21672f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
21682f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
21692f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
21702f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
21712f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
21722f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
21732f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
21742f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
21752f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
21762f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
21772f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
21782f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
21792f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
21802f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
21812f7816d4SBarry Smith       jrow++;
21822f7816d4SBarry Smith     }
21832f7816d4SBarry Smith     y[16 * i] += sum1;
21842f7816d4SBarry Smith     y[16 * i + 1] += sum2;
21852f7816d4SBarry Smith     y[16 * i + 2] += sum3;
21862f7816d4SBarry Smith     y[16 * i + 3] += sum4;
21872f7816d4SBarry Smith     y[16 * i + 4] += sum5;
21882f7816d4SBarry Smith     y[16 * i + 5] += sum6;
21892f7816d4SBarry Smith     y[16 * i + 6] += sum7;
21902f7816d4SBarry Smith     y[16 * i + 7] += sum8;
21912f7816d4SBarry Smith     y[16 * i + 8] += sum9;
21922f7816d4SBarry Smith     y[16 * i + 9] += sum10;
21932f7816d4SBarry Smith     y[16 * i + 10] += sum11;
21942f7816d4SBarry Smith     y[16 * i + 11] += sum12;
21952f7816d4SBarry Smith     y[16 * i + 12] += sum13;
21962f7816d4SBarry Smith     y[16 * i + 13] += sum14;
21972f7816d4SBarry Smith     y[16 * i + 14] += sum15;
21982f7816d4SBarry Smith     y[16 * i + 15] += sum16;
21992f7816d4SBarry Smith   }
22002f7816d4SBarry Smith 
22019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
22042f7816d4SBarry Smith   PetscFunctionReturn(0);
22052f7816d4SBarry Smith }
22062f7816d4SBarry Smith 
22079371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
22082f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
22092f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2210d2840e09SBarry Smith   const PetscScalar *x, *v;
2211d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
22122f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2213d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2214d2840e09SBarry Smith   PetscInt           n, i;
22152f7816d4SBarry Smith 
22162f7816d4SBarry Smith   PetscFunctionBegin;
22179566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
22189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
22202f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2221bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2222bfec09a0SHong Zhang     v       = a->a + a->i[i];
22232f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
22242f7816d4SBarry Smith     alpha1  = x[16 * i];
22252f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
22262f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
22272f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
22282f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
22292f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
22302f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
22312f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
22322f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
22332f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
22342f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
22352f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
22362f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
22372f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
22382f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
22392f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
22402f7816d4SBarry Smith     while (n-- > 0) {
22412f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
22422f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
22432f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
22442f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
22452f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
22462f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
22472f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
22482f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
22492f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
22502f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
22512f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
22522f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
22532f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
22542f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
22552f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
22562f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
22579371c9d4SSatish Balay       idx++;
22589371c9d4SSatish Balay       v++;
22592f7816d4SBarry Smith     }
22602f7816d4SBarry Smith   }
22619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
226466ed3db0SBarry Smith   PetscFunctionReturn(0);
226566ed3db0SBarry Smith }
226666ed3db0SBarry Smith 
2267ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
22689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2269ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2270ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2271d2840e09SBarry Smith   const PetscScalar *x, *v;
2272d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2273ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2274d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2275d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
2276ed1c418cSBarry Smith 
2277ed1c418cSBarry Smith   PetscFunctionBegin;
22789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2280ed1c418cSBarry Smith   idx = a->j;
2281ed1c418cSBarry Smith   v   = a->a;
2282ed1c418cSBarry Smith   ii  = a->i;
2283ed1c418cSBarry Smith 
2284ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2285ed1c418cSBarry Smith     jrow  = ii[i];
2286ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2287ed1c418cSBarry Smith     sum1  = 0.0;
2288ed1c418cSBarry Smith     sum2  = 0.0;
2289ed1c418cSBarry Smith     sum3  = 0.0;
2290ed1c418cSBarry Smith     sum4  = 0.0;
2291ed1c418cSBarry Smith     sum5  = 0.0;
2292ed1c418cSBarry Smith     sum6  = 0.0;
2293ed1c418cSBarry Smith     sum7  = 0.0;
2294ed1c418cSBarry Smith     sum8  = 0.0;
2295ed1c418cSBarry Smith     sum9  = 0.0;
2296ed1c418cSBarry Smith     sum10 = 0.0;
2297ed1c418cSBarry Smith     sum11 = 0.0;
2298ed1c418cSBarry Smith     sum12 = 0.0;
2299ed1c418cSBarry Smith     sum13 = 0.0;
2300ed1c418cSBarry Smith     sum14 = 0.0;
2301ed1c418cSBarry Smith     sum15 = 0.0;
2302ed1c418cSBarry Smith     sum16 = 0.0;
2303ed1c418cSBarry Smith     sum17 = 0.0;
2304ed1c418cSBarry Smith     sum18 = 0.0;
230526fbe8dcSKarl Rupp 
2306ed1c418cSBarry Smith     nonzerorow += (n > 0);
2307ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2308ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2309ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2310ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2311ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2312ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2313ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2314ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2315ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2316ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2317ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2318ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2319ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2320ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2321ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2322ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2323ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2324ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2325ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2326ed1c418cSBarry Smith       jrow++;
2327ed1c418cSBarry Smith     }
2328ed1c418cSBarry Smith     y[18 * i]      = sum1;
2329ed1c418cSBarry Smith     y[18 * i + 1]  = sum2;
2330ed1c418cSBarry Smith     y[18 * i + 2]  = sum3;
2331ed1c418cSBarry Smith     y[18 * i + 3]  = sum4;
2332ed1c418cSBarry Smith     y[18 * i + 4]  = sum5;
2333ed1c418cSBarry Smith     y[18 * i + 5]  = sum6;
2334ed1c418cSBarry Smith     y[18 * i + 6]  = sum7;
2335ed1c418cSBarry Smith     y[18 * i + 7]  = sum8;
2336ed1c418cSBarry Smith     y[18 * i + 8]  = sum9;
2337ed1c418cSBarry Smith     y[18 * i + 9]  = sum10;
2338ed1c418cSBarry Smith     y[18 * i + 10] = sum11;
2339ed1c418cSBarry Smith     y[18 * i + 11] = sum12;
2340ed1c418cSBarry Smith     y[18 * i + 12] = sum13;
2341ed1c418cSBarry Smith     y[18 * i + 13] = sum14;
2342ed1c418cSBarry Smith     y[18 * i + 14] = sum15;
2343ed1c418cSBarry Smith     y[18 * i + 15] = sum16;
2344ed1c418cSBarry Smith     y[18 * i + 16] = sum17;
2345ed1c418cSBarry Smith     y[18 * i + 17] = sum18;
2346ed1c418cSBarry Smith   }
2347ed1c418cSBarry Smith 
23489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
23499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
23509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2351ed1c418cSBarry Smith   PetscFunctionReturn(0);
2352ed1c418cSBarry Smith }
2353ed1c418cSBarry Smith 
23549371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2355ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2356ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2357d2840e09SBarry Smith   const PetscScalar *x, *v;
2358d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2359ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2360d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2361d2840e09SBarry Smith   PetscInt           n, i;
2362ed1c418cSBarry Smith 
2363ed1c418cSBarry Smith   PetscFunctionBegin;
23649566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
23659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
23669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2367ed1c418cSBarry Smith 
2368ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2369ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2370ed1c418cSBarry Smith     v       = a->a + a->i[i];
2371ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2372ed1c418cSBarry Smith     alpha1  = x[18 * i];
2373ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2374ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2375ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2376ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2377ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2378ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2379ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2380ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2381ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2382ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2383ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2384ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2385ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2386ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2387ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2388ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2389ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2390ed1c418cSBarry Smith     while (n-- > 0) {
2391ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2392ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2393ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2394ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2395ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2396ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2397ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2398ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2399ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2400ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2401ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2402ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2403ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2404ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2405ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2406ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2407ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2408ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
24099371c9d4SSatish Balay       idx++;
24109371c9d4SSatish Balay       v++;
2411ed1c418cSBarry Smith     }
2412ed1c418cSBarry Smith   }
24139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2416ed1c418cSBarry Smith   PetscFunctionReturn(0);
2417ed1c418cSBarry Smith }
2418ed1c418cSBarry Smith 
24199371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2420ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2421ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2422d2840e09SBarry Smith   const PetscScalar *x, *v;
2423d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2424ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2425d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2426ed1c418cSBarry Smith   PetscInt           n, i, jrow, j;
2427ed1c418cSBarry Smith 
2428ed1c418cSBarry Smith   PetscFunctionBegin;
24299566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
24309566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
24319566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2432ed1c418cSBarry Smith   idx = a->j;
2433ed1c418cSBarry Smith   v   = a->a;
2434ed1c418cSBarry Smith   ii  = a->i;
2435ed1c418cSBarry Smith 
2436ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2437ed1c418cSBarry Smith     jrow  = ii[i];
2438ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2439ed1c418cSBarry Smith     sum1  = 0.0;
2440ed1c418cSBarry Smith     sum2  = 0.0;
2441ed1c418cSBarry Smith     sum3  = 0.0;
2442ed1c418cSBarry Smith     sum4  = 0.0;
2443ed1c418cSBarry Smith     sum5  = 0.0;
2444ed1c418cSBarry Smith     sum6  = 0.0;
2445ed1c418cSBarry Smith     sum7  = 0.0;
2446ed1c418cSBarry Smith     sum8  = 0.0;
2447ed1c418cSBarry Smith     sum9  = 0.0;
2448ed1c418cSBarry Smith     sum10 = 0.0;
2449ed1c418cSBarry Smith     sum11 = 0.0;
2450ed1c418cSBarry Smith     sum12 = 0.0;
2451ed1c418cSBarry Smith     sum13 = 0.0;
2452ed1c418cSBarry Smith     sum14 = 0.0;
2453ed1c418cSBarry Smith     sum15 = 0.0;
2454ed1c418cSBarry Smith     sum16 = 0.0;
2455ed1c418cSBarry Smith     sum17 = 0.0;
2456ed1c418cSBarry Smith     sum18 = 0.0;
2457ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2458ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2459ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2460ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2461ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2462ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2463ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2464ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2465ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2466ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2467ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2468ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2469ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2470ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2471ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2472ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2473ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2474ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2475ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2476ed1c418cSBarry Smith       jrow++;
2477ed1c418cSBarry Smith     }
2478ed1c418cSBarry Smith     y[18 * i] += sum1;
2479ed1c418cSBarry Smith     y[18 * i + 1] += sum2;
2480ed1c418cSBarry Smith     y[18 * i + 2] += sum3;
2481ed1c418cSBarry Smith     y[18 * i + 3] += sum4;
2482ed1c418cSBarry Smith     y[18 * i + 4] += sum5;
2483ed1c418cSBarry Smith     y[18 * i + 5] += sum6;
2484ed1c418cSBarry Smith     y[18 * i + 6] += sum7;
2485ed1c418cSBarry Smith     y[18 * i + 7] += sum8;
2486ed1c418cSBarry Smith     y[18 * i + 8] += sum9;
2487ed1c418cSBarry Smith     y[18 * i + 9] += sum10;
2488ed1c418cSBarry Smith     y[18 * i + 10] += sum11;
2489ed1c418cSBarry Smith     y[18 * i + 11] += sum12;
2490ed1c418cSBarry Smith     y[18 * i + 12] += sum13;
2491ed1c418cSBarry Smith     y[18 * i + 13] += sum14;
2492ed1c418cSBarry Smith     y[18 * i + 14] += sum15;
2493ed1c418cSBarry Smith     y[18 * i + 15] += sum16;
2494ed1c418cSBarry Smith     y[18 * i + 16] += sum17;
2495ed1c418cSBarry Smith     y[18 * i + 17] += sum18;
2496ed1c418cSBarry Smith   }
2497ed1c418cSBarry Smith 
24989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2501ed1c418cSBarry Smith   PetscFunctionReturn(0);
2502ed1c418cSBarry Smith }
2503ed1c418cSBarry Smith 
25049371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2505ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2506ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2507d2840e09SBarry Smith   const PetscScalar *x, *v;
2508d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2509ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2510d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2511d2840e09SBarry Smith   PetscInt           n, i;
2512ed1c418cSBarry Smith 
2513ed1c418cSBarry Smith   PetscFunctionBegin;
25149566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
25159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2517ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2518ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2519ed1c418cSBarry Smith     v       = a->a + a->i[i];
2520ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2521ed1c418cSBarry Smith     alpha1  = x[18 * i];
2522ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2523ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2524ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2525ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2526ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2527ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2528ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2529ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2530ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2531ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2532ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2533ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2534ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2535ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2536ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2537ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2538ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2539ed1c418cSBarry Smith     while (n-- > 0) {
2540ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2541ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2542ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2543ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2544ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2545ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2546ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2547ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2548ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2549ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2550ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2551ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2552ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2553ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2554ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2555ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2556ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2557ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
25589371c9d4SSatish Balay       idx++;
25599371c9d4SSatish Balay       v++;
2560ed1c418cSBarry Smith     }
2561ed1c418cSBarry Smith   }
25629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
25639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2565ed1c418cSBarry Smith   PetscFunctionReturn(0);
2566ed1c418cSBarry Smith }
2567ed1c418cSBarry Smith 
25689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
25696861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
25706861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
25716861f193SBarry Smith   const PetscScalar *x, *v;
25726861f193SBarry Smith   PetscScalar       *y, *sums;
25736861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
25746861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
25756861f193SBarry Smith 
25766861f193SBarry Smith   PetscFunctionBegin;
25779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25789566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
25799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
25806861f193SBarry Smith   idx = a->j;
25816861f193SBarry Smith   v   = a->a;
25826861f193SBarry Smith   ii  = a->i;
25836861f193SBarry Smith 
25846861f193SBarry Smith   for (i = 0; i < m; i++) {
25856861f193SBarry Smith     jrow = ii[i];
25866861f193SBarry Smith     n    = ii[i + 1] - jrow;
25876861f193SBarry Smith     sums = y + dof * i;
25886861f193SBarry Smith     for (j = 0; j < n; j++) {
2589ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
25906861f193SBarry Smith       jrow++;
25916861f193SBarry Smith     }
25926861f193SBarry Smith   }
25936861f193SBarry Smith 
25949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
25959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
25976861f193SBarry Smith   PetscFunctionReturn(0);
25986861f193SBarry Smith }
25996861f193SBarry Smith 
26009371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26016861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26026861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26036861f193SBarry Smith   const PetscScalar *x, *v;
26046861f193SBarry Smith   PetscScalar       *y, *sums;
26056861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26066861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
26076861f193SBarry Smith 
26086861f193SBarry Smith   PetscFunctionBegin;
26099566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26126861f193SBarry Smith   idx = a->j;
26136861f193SBarry Smith   v   = a->a;
26146861f193SBarry Smith   ii  = a->i;
26156861f193SBarry Smith 
26166861f193SBarry Smith   for (i = 0; i < m; i++) {
26176861f193SBarry Smith     jrow = ii[i];
26186861f193SBarry Smith     n    = ii[i + 1] - jrow;
26196861f193SBarry Smith     sums = y + dof * i;
26206861f193SBarry Smith     for (j = 0; j < n; j++) {
2621ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
26226861f193SBarry Smith       jrow++;
26236861f193SBarry Smith     }
26246861f193SBarry Smith   }
26256861f193SBarry Smith 
26269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26296861f193SBarry Smith   PetscFunctionReturn(0);
26306861f193SBarry Smith }
26316861f193SBarry Smith 
26329371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
26336861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26346861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26356861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26366861f193SBarry Smith   PetscScalar       *y;
26376861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26386861f193SBarry Smith   PetscInt           n, i, k;
26396861f193SBarry Smith 
26406861f193SBarry Smith   PetscFunctionBegin;
26419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26429566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
26439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
26446861f193SBarry Smith   for (i = 0; i < m; i++) {
26456861f193SBarry Smith     idx   = a->j + a->i[i];
26466861f193SBarry Smith     v     = a->a + a->i[i];
26476861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26486861f193SBarry Smith     alpha = x + dof * i;
26496861f193SBarry Smith     while (n-- > 0) {
2650ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
26519371c9d4SSatish Balay       idx++;
26529371c9d4SSatish Balay       v++;
26536861f193SBarry Smith     }
26546861f193SBarry Smith   }
26559566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
26586861f193SBarry Smith   PetscFunctionReturn(0);
26596861f193SBarry Smith }
26606861f193SBarry Smith 
26619371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26626861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26636861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26646861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26656861f193SBarry Smith   PetscScalar       *y;
26666861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26676861f193SBarry Smith   PetscInt           n, i, k;
26686861f193SBarry Smith 
26696861f193SBarry Smith   PetscFunctionBegin;
26709566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26719566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26736861f193SBarry Smith   for (i = 0; i < m; i++) {
26746861f193SBarry Smith     idx   = a->j + a->i[i];
26756861f193SBarry Smith     v     = a->a + a->i[i];
26766861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26776861f193SBarry Smith     alpha = x + dof * i;
26786861f193SBarry Smith     while (n-- > 0) {
2679ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
26809371c9d4SSatish Balay       idx++;
26819371c9d4SSatish Balay       v++;
26826861f193SBarry Smith     }
26836861f193SBarry Smith   }
26849566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26876861f193SBarry Smith   PetscFunctionReturn(0);
26886861f193SBarry Smith }
26896861f193SBarry Smith 
2690f4a53059SBarry Smith /*===================================================================================*/
26919371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
26924cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2693f4a53059SBarry Smith 
2694b24ad042SBarry Smith   PetscFunctionBegin;
2695f4a53059SBarry Smith   /* start the scatter */
26969566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
26979566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
26989566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
26999566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2700f4a53059SBarry Smith   PetscFunctionReturn(0);
2701f4a53059SBarry Smith }
2702f4a53059SBarry Smith 
27039371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
27044cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2705b24ad042SBarry Smith 
27064cb9d9b8SBarry Smith   PetscFunctionBegin;
27079566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27089566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
27099566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27109566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27114cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27124cb9d9b8SBarry Smith }
27134cb9d9b8SBarry Smith 
27149371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27154cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
27164cb9d9b8SBarry Smith 
2717b24ad042SBarry Smith   PetscFunctionBegin;
27184cb9d9b8SBarry Smith   /* start the scatter */
27199566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27209566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
27219566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27229566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
27234cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27244cb9d9b8SBarry Smith }
27254cb9d9b8SBarry Smith 
27269371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27274cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2728b24ad042SBarry Smith 
27294cb9d9b8SBarry Smith   PetscFunctionBegin;
27309566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27319566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
27329566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27339566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27344cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27354cb9d9b8SBarry Smith }
27364cb9d9b8SBarry Smith 
273795ddefa2SHong Zhang /* ----------------------------------------------------------------*/
27389371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) {
27394222ddf1SHong Zhang   Mat_Product *product = C->product;
27404222ddf1SHong Zhang 
27414222ddf1SHong Zhang   PetscFunctionBegin;
27424222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
27434222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
274498921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
27454222ddf1SHong Zhang   PetscFunctionReturn(0);
27464222ddf1SHong Zhang }
27474222ddf1SHong Zhang 
27489371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) {
27494222ddf1SHong Zhang   Mat_Product *product = C->product;
27504222ddf1SHong Zhang   PetscBool    flg     = PETSC_FALSE;
27514222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
27524222ddf1SHong Zhang   PetscInt     alg = 1; /* set default algorithm */
27534222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
27544222ddf1SHong Zhang   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
27554222ddf1SHong Zhang   PetscInt    nalg        = 4;
27564222ddf1SHong Zhang #else
27574222ddf1SHong Zhang   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
27584222ddf1SHong Zhang   PetscInt    nalg        = 5;
27594222ddf1SHong Zhang #endif
27604222ddf1SHong Zhang 
27614222ddf1SHong Zhang   PetscFunctionBegin;
276208401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
27634222ddf1SHong Zhang 
27644222ddf1SHong Zhang   /* PtAP */
27654222ddf1SHong Zhang   /* Check matrix local sizes */
27669371c9d4SSatish Balay   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27679371c9d4SSatish Balay              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
27689371c9d4SSatish Balay   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27699371c9d4SSatish Balay              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
27704222ddf1SHong Zhang 
27714222ddf1SHong Zhang   /* Set the default algorithm */
27729566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
277348a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
27744222ddf1SHong Zhang 
27754222ddf1SHong Zhang   /* Get runtime option */
2776d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
27779566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
277848a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2779d0609cedSBarry Smith   PetscOptionsEnd();
27804222ddf1SHong Zhang 
27819566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
27824222ddf1SHong Zhang   if (flg) {
27834222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
27844222ddf1SHong Zhang     PetscFunctionReturn(0);
27854222ddf1SHong Zhang   }
27864222ddf1SHong Zhang 
27879566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
27884222ddf1SHong Zhang   if (flg) {
27894222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
27904222ddf1SHong Zhang     PetscFunctionReturn(0);
27914222ddf1SHong Zhang   }
27924222ddf1SHong Zhang 
27934222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
27949566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
27959566063dSJacob Faibussowitsch   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
27969566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
27974222ddf1SHong Zhang   PetscFunctionReturn(0);
27984222ddf1SHong Zhang }
27994222ddf1SHong Zhang 
28004222ddf1SHong Zhang /* ----------------------------------------------------------------*/
28019371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) {
28020298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
28037ba1a0bfSKris Buschelman   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
28047ba1a0bfSKris Buschelman   Mat                P  = pp->AIJ;
28057ba1a0bfSKris Buschelman   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2806d2840e09SBarry Smith   PetscInt          *pti, *ptj, *ptJ;
28077ba1a0bfSKris Buschelman   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2808d2840e09SBarry Smith   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2809d2840e09SBarry Smith   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
28107ba1a0bfSKris Buschelman   MatScalar         *ca;
2811d2840e09SBarry Smith   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
28127ba1a0bfSKris Buschelman 
28137ba1a0bfSKris Buschelman   PetscFunctionBegin;
28147ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28159566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
28167ba1a0bfSKris Buschelman 
28177ba1a0bfSKris Buschelman   cn = pn * ppdof;
28187ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28197ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn + 1, &ci));
28217ba1a0bfSKris Buschelman   ci[0] = 0;
28227ba1a0bfSKris Buschelman 
28237ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
28259566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow, an));
28269566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow, cn));
28277ba1a0bfSKris Buschelman 
28287ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28297ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28307ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28319566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
28327ba1a0bfSKris Buschelman   current_space = free_space;
28337ba1a0bfSKris Buschelman 
28347ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
28357ba1a0bfSKris Buschelman   for (i = 0; i < pn; i++) {
28367ba1a0bfSKris Buschelman     ptnzi = pti[i + 1] - pti[i];
28377ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
28387ba1a0bfSKris Buschelman     for (dof = 0; dof < ppdof; dof++) {
28397ba1a0bfSKris Buschelman       ptanzi = 0;
28407ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
28417ba1a0bfSKris Buschelman       for (j = 0; j < ptnzi; j++) {
28427ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
28437ba1a0bfSKris Buschelman         arow = ptJ[j] * ppdof + dof;
28447ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
28457ba1a0bfSKris Buschelman         anzj = ai[arow + 1] - ai[arow];
28467ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
28477ba1a0bfSKris Buschelman         for (k = 0; k < anzj; k++) {
28487ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
28497ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
28507ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
28517ba1a0bfSKris Buschelman           }
28527ba1a0bfSKris Buschelman         }
28537ba1a0bfSKris Buschelman       }
28547ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
28557ba1a0bfSKris Buschelman       ptaj = ptasparserow;
28567ba1a0bfSKris Buschelman       cnzi = 0;
28577ba1a0bfSKris Buschelman       for (j = 0; j < ptanzi; j++) {
28587ba1a0bfSKris Buschelman         /* Get offset within block of P */
28597ba1a0bfSKris Buschelman         pshift = *ptaj % ppdof;
28607ba1a0bfSKris Buschelman         /* Get block row of P */
28617ba1a0bfSKris Buschelman         prow   = (*ptaj++) / ppdof; /* integer division */
28627ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
28637ba1a0bfSKris Buschelman         pnzj   = pi[prow + 1] - pi[prow];
28647ba1a0bfSKris Buschelman         pjj    = pj + pi[prow];
28657ba1a0bfSKris Buschelman         for (k = 0; k < pnzj; k++) {
28667ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
28677ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
28687ba1a0bfSKris Buschelman           if (!denserow[pjj[k] * ppdof + pshift]) {
28697ba1a0bfSKris Buschelman             denserow[pjj[k] * ppdof + pshift] = -1;
28707ba1a0bfSKris Buschelman             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
28717ba1a0bfSKris Buschelman           }
28727ba1a0bfSKris Buschelman         }
28737ba1a0bfSKris Buschelman       }
28747ba1a0bfSKris Buschelman 
28757ba1a0bfSKris Buschelman       /* sort sparserow */
28769566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi, sparserow));
28777ba1a0bfSKris Buschelman 
28787ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
28797ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
288048a46eb9SPierre Jolivet       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
28817ba1a0bfSKris Buschelman 
28827ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
28839566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
288426fbe8dcSKarl Rupp 
28857ba1a0bfSKris Buschelman       current_space->array += cnzi;
28867ba1a0bfSKris Buschelman       current_space->local_used += cnzi;
28877ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
28887ba1a0bfSKris Buschelman 
288926fbe8dcSKarl Rupp       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
289026fbe8dcSKarl Rupp       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
289126fbe8dcSKarl Rupp 
28927ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
28937ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
28947ba1a0bfSKris Buschelman       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
28957ba1a0bfSKris Buschelman     }
28967ba1a0bfSKris Buschelman   }
28977ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
28987ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
28997ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
29019566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
29029566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
29037ba1a0bfSKris Buschelman 
29047ba1a0bfSKris Buschelman   /* Allocate space for ca */
29059566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
29067ba1a0bfSKris Buschelman 
29077ba1a0bfSKris Buschelman   /* put together the new matrix */
29089566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
29099566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C, pp->dof));
29107ba1a0bfSKris Buschelman 
29117ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29127ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29134222ddf1SHong Zhang   c          = (Mat_SeqAIJ *)(C->data);
2914e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2915e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29167ba1a0bfSKris Buschelman   c->nonew   = 0;
291726fbe8dcSKarl Rupp 
29184222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29194222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29207ba1a0bfSKris Buschelman 
29217ba1a0bfSKris Buschelman   /* Clean up. */
29229566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
29237ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29247ba1a0bfSKris Buschelman }
29257ba1a0bfSKris Buschelman 
29269371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) {
29277ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29287ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
29297ba1a0bfSKris Buschelman   Mat              P  = pp->AIJ;
29307ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
29317ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
29327ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2933a2ea699eSBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2934d2840e09SBarry Smith   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2935d2840e09SBarry Smith   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2936d2840e09SBarry Smith   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2937a2ea699eSBarry Smith   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2938d2840e09SBarry Smith   MatScalar       *ca = c->a, *caj, *apa;
29397ba1a0bfSKris Buschelman 
29407ba1a0bfSKris Buschelman   PetscFunctionBegin;
29417ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
29429566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
29437ba1a0bfSKris Buschelman 
29447ba1a0bfSKris Buschelman   /* Clear old values in C */
29459566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca, ci[cm]));
29467ba1a0bfSKris Buschelman 
29477ba1a0bfSKris Buschelman   for (i = 0; i < am; i++) {
29487ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
29497ba1a0bfSKris Buschelman     anzi  = ai[i + 1] - ai[i];
29507ba1a0bfSKris Buschelman     apnzj = 0;
29517ba1a0bfSKris Buschelman     for (j = 0; j < anzi; j++) {
29527ba1a0bfSKris Buschelman       /* Get offset within block of P */
29537ba1a0bfSKris Buschelman       pshift = *aj % ppdof;
29547ba1a0bfSKris Buschelman       /* Get block row of P */
29557ba1a0bfSKris Buschelman       prow   = *aj++ / ppdof; /* integer division */
29567ba1a0bfSKris Buschelman       pnzj   = pi[prow + 1] - pi[prow];
29577ba1a0bfSKris Buschelman       pjj    = pj + pi[prow];
29587ba1a0bfSKris Buschelman       paj    = pa + pi[prow];
29597ba1a0bfSKris Buschelman       for (k = 0; k < pnzj; k++) {
29607ba1a0bfSKris Buschelman         poffset = pjj[k] * ppdof + pshift;
29617ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
29627ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
29637ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
29647ba1a0bfSKris Buschelman         }
29657ba1a0bfSKris Buschelman         apa[poffset] += (*aa) * paj[k];
29667ba1a0bfSKris Buschelman       }
29679566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * pnzj));
29687ba1a0bfSKris Buschelman       aa++;
29697ba1a0bfSKris Buschelman     }
29707ba1a0bfSKris Buschelman 
29717ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
29727ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
29739566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj, apj));
29747ba1a0bfSKris Buschelman 
29757ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
29767ba1a0bfSKris Buschelman     prow    = i / ppdof; /* integer division */
29777ba1a0bfSKris Buschelman     pshift  = i % ppdof;
29787ba1a0bfSKris Buschelman     poffset = pi[prow];
29797ba1a0bfSKris Buschelman     pnzi    = pi[prow + 1] - poffset;
29807ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
29817ba1a0bfSKris Buschelman     pJ      = pj + poffset;
29827ba1a0bfSKris Buschelman     pA      = pa + poffset;
29837ba1a0bfSKris Buschelman     for (j = 0; j < pnzi; j++) {
29847ba1a0bfSKris Buschelman       crow = (*pJ) * ppdof + pshift;
29857ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
29867ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
29877ba1a0bfSKris Buschelman       pJ++;
29887ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
29897ba1a0bfSKris Buschelman       for (k = 0, nextap = 0; nextap < apnzj; k++) {
299026fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
29917ba1a0bfSKris Buschelman       }
29929566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * apnzj));
29937ba1a0bfSKris Buschelman       pA++;
29947ba1a0bfSKris Buschelman     }
29957ba1a0bfSKris Buschelman 
29967ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
29977ba1a0bfSKris Buschelman     for (j = 0; j < apnzj; j++) {
29987ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
29997ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30007ba1a0bfSKris Buschelman     }
30017ba1a0bfSKris Buschelman   }
30027ba1a0bfSKris Buschelman 
30037ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
30059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
30069566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa, apj, apjdense));
300795ddefa2SHong Zhang   PetscFunctionReturn(0);
300895ddefa2SHong Zhang }
30097ba1a0bfSKris Buschelman 
30109371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) {
30114222ddf1SHong Zhang   Mat_Product *product = C->product;
30124222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30132121bac1SHong Zhang 
30142121bac1SHong Zhang   PetscFunctionBegin;
30159566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
30162121bac1SHong Zhang   PetscFunctionReturn(0);
30172121bac1SHong Zhang }
30182121bac1SHong Zhang 
30199371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) {
302095ddefa2SHong Zhang   PetscFunctionBegin;
30213e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
302295ddefa2SHong Zhang }
302395ddefa2SHong Zhang 
30249371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) {
302595ddefa2SHong Zhang   PetscFunctionBegin;
30263e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
30277ba1a0bfSKris Buschelman }
30285c65b9ecSFande Kong 
3029bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3030bc8e477aSFande Kong 
30319371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) {
3032bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3033bc8e477aSFande Kong 
3034bc8e477aSFande Kong   PetscFunctionBegin;
303534bcad68SFande Kong 
30369566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3037bc8e477aSFande Kong   PetscFunctionReturn(0);
3038bc8e477aSFande Kong }
3039bc8e477aSFande Kong 
30404222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3041bc8e477aSFande Kong 
30429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) {
3043bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3044bc8e477aSFande Kong 
3045bc8e477aSFande Kong   PetscFunctionBegin;
30469566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
30474222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3048bc8e477aSFande Kong   PetscFunctionReturn(0);
3049bc8e477aSFande Kong }
3050bc8e477aSFande Kong 
3051bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3052bc8e477aSFande Kong 
30539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) {
3054bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3055bc8e477aSFande Kong 
3056bc8e477aSFande Kong   PetscFunctionBegin;
305734bcad68SFande Kong 
30589566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3059bc8e477aSFande Kong   PetscFunctionReturn(0);
3060bc8e477aSFande Kong }
3061bc8e477aSFande Kong 
30624222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3063bc8e477aSFande Kong 
30649371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) {
3065bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3066bc8e477aSFande Kong 
3067bc8e477aSFande Kong   PetscFunctionBegin;
306834bcad68SFande Kong 
30699566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
30704222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3071bc8e477aSFande Kong   PetscFunctionReturn(0);
3072bc8e477aSFande Kong }
3073bc8e477aSFande Kong 
30749371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) {
30754222ddf1SHong Zhang   Mat_Product *product = C->product;
30764222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30774222ddf1SHong Zhang   PetscBool    flg;
3078bc8e477aSFande Kong 
3079bc8e477aSFande Kong   PetscFunctionBegin;
30809566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
30814222ddf1SHong Zhang   if (flg) {
30829566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
30834222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
30844222ddf1SHong Zhang     PetscFunctionReturn(0);
3085bc8e477aSFande Kong   }
308634bcad68SFande Kong 
30879566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
30884222ddf1SHong Zhang   if (flg) {
30899566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
30904222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
30914222ddf1SHong Zhang     PetscFunctionReturn(0);
30924222ddf1SHong Zhang   }
309334bcad68SFande Kong 
30944222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3095bc8e477aSFande Kong }
3096bc8e477aSFande Kong 
30979371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
30989c4fc4c7SBarry Smith   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3099ceb03754SKris Buschelman   Mat          a   = b->AIJ, B;
31009c4fc4c7SBarry Smith   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
31010fd73130SBarry Smith   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3102ba8c8a56SBarry Smith   PetscInt    *cols;
3103ba8c8a56SBarry Smith   PetscScalar *vals;
31049c4fc4c7SBarry Smith 
31059c4fc4c7SBarry Smith   PetscFunctionBegin;
31069566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a, &m, &n));
31079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof * m, &ilen));
31089c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31099c4fc4c7SBarry Smith     nmax = PetscMax(nmax, aij->ilen[i]);
311026fbe8dcSKarl Rupp     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
31119c4fc4c7SBarry Smith   }
31129566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
31139566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
31149566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31159566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
31169566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax, &icols));
31189c4fc4c7SBarry Smith   ii = 0;
31199c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31209566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31210fd73130SBarry Smith     for (j = 0; j < dof; j++) {
312226fbe8dcSKarl Rupp       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
31239566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
31249c4fc4c7SBarry Smith       ii++;
31259c4fc4c7SBarry Smith     }
31269566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31279c4fc4c7SBarry Smith   }
31289566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
31299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
31309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3131ceb03754SKris Buschelman 
3132511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
31339566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
3134c3d102feSKris Buschelman   } else {
3135ceb03754SKris Buschelman     *newmat = B;
3136c3d102feSKris Buschelman   }
31379c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31389c4fc4c7SBarry Smith }
31399c4fc4c7SBarry Smith 
3140c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3141be1d678aSKris Buschelman 
31429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
31430fd73130SBarry Smith   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3144ceb03754SKris Buschelman   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
31450fd73130SBarry Smith   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
31460fd73130SBarry Smith   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
31470fd73130SBarry Smith   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
31480fd73130SBarry Smith   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
31490298fd71SBarry Smith   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
31500298fd71SBarry Smith   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
31510fd73130SBarry Smith   PetscInt     rstart, cstart, *garray, ii, k;
31520fd73130SBarry Smith   PetscScalar *vals, *ovals;
31530fd73130SBarry Smith 
31540fd73130SBarry Smith   PetscFunctionBegin;
31559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3156d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31570fd73130SBarry Smith     nmax  = PetscMax(nmax, AIJ->ilen[i]);
31580fd73130SBarry Smith     onmax = PetscMax(onmax, OAIJ->ilen[i]);
31590fd73130SBarry Smith     for (j = 0; j < dof; j++) {
31600fd73130SBarry Smith       dnz[dof * i + j] = AIJ->ilen[i];
31610fd73130SBarry Smith       onz[dof * i + j] = OAIJ->ilen[i];
31620fd73130SBarry Smith     }
31630fd73130SBarry Smith   }
31649566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
31659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
31669566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31679566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
31689566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, dof));
31699566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz, onz));
31700fd73130SBarry Smith 
31719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3172d0f46423SBarry Smith   rstart = dof * maij->A->rmap->rstart;
3173d0f46423SBarry Smith   cstart = dof * maij->A->cmap->rstart;
31740fd73130SBarry Smith   garray = mpiaij->garray;
31750fd73130SBarry Smith 
31760fd73130SBarry Smith   ii = rstart;
3177d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31789566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
31799566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
31800fd73130SBarry Smith     for (j = 0; j < dof; j++) {
3181ad540459SPierre Jolivet       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3182ad540459SPierre Jolivet       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
31839566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
31849566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
31850fd73130SBarry Smith       ii++;
31860fd73130SBarry Smith     }
31879566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
31889566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
31890fd73130SBarry Smith   }
31909566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols, oicols));
31910fd73130SBarry Smith 
31929566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
31939566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3194ceb03754SKris Buschelman 
3195511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
31967adad957SLisandro Dalcin     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
31977adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
319826fbe8dcSKarl Rupp 
31999566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
320026fbe8dcSKarl Rupp 
32017adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3202c3d102feSKris Buschelman   } else {
3203ceb03754SKris Buschelman     *newmat = B;
3204c3d102feSKris Buschelman   }
32050fd73130SBarry Smith   PetscFunctionReturn(0);
32060fd73130SBarry Smith }
32070fd73130SBarry Smith 
32089371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
32099e516c8fSBarry Smith   Mat A;
32109e516c8fSBarry Smith 
32119e516c8fSBarry Smith   PetscFunctionBegin;
32129566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32139566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
32149566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
32159e516c8fSBarry Smith   PetscFunctionReturn(0);
32169e516c8fSBarry Smith }
32170fd73130SBarry Smith 
32189371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) {
3219ec626240SStefano Zampini   Mat A;
3220ec626240SStefano Zampini 
3221ec626240SStefano Zampini   PetscFunctionBegin;
32229566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32239566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
32249566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3225ec626240SStefano Zampini   PetscFunctionReturn(0);
3226ec626240SStefano Zampini }
3227ec626240SStefano Zampini 
3228bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3229480dffcfSJed Brown /*@
32300bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32310bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
323211a5261eSBarry Smith   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
323311a5261eSBarry Smith   and `MATMPIAIJ` for distributed matrices.
32340bad9183SKris Buschelman 
3235ff585edeSJed Brown   Collective
3236ff585edeSJed Brown 
3237ff585edeSJed Brown   Input Parameters:
323811a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks
3239ff585edeSJed Brown - dof - the block size (number of components per node)
3240ff585edeSJed Brown 
3241ff585edeSJed Brown   Output Parameter:
324211a5261eSBarry Smith . maij - the new `MATMAIJ` matrix
3243ff585edeSJed Brown 
32440bad9183SKris Buschelman   Operations provided:
324567b8a455SSatish Balay .vb
324611a5261eSBarry Smith     MatMult()
324711a5261eSBarry Smith     MatMultTranspose()
324811a5261eSBarry Smith     MatMultAdd()
324911a5261eSBarry Smith     MatMultTransposeAdd()
325011a5261eSBarry Smith     MatView()
325167b8a455SSatish Balay .ve
32520bad9183SKris Buschelman 
32530bad9183SKris Buschelman   Level: advanced
32540bad9183SKris Buschelman 
325511a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3256ff585edeSJed Brown @*/
32579371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) {
3258b24ad042SBarry Smith   PetscInt  n;
325982b1193eSBarry Smith   Mat       B;
326090f67b22SBarry Smith   PetscBool flg;
3261fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3262fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3263fdc842d1SBarry Smith   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3264fdc842d1SBarry Smith #endif
326582b1193eSBarry Smith 
326682b1193eSBarry Smith   PetscFunctionBegin;
3267fdc842d1SBarry Smith   dof = PetscAbs(dof);
32689566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3269d72c5c08SBarry Smith 
327026fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
327126fbe8dcSKarl Rupp   else {
32729566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3273c248e2fdSStefano Zampini     /* propagate vec type */
32749566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B, A->defaultvectype));
32759566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
32769566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
32779566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
32789566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
32799566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
328026fbe8dcSKarl Rupp 
3281cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3282d72c5c08SBarry Smith 
328390f67b22SBarry Smith     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
328490f67b22SBarry Smith     if (flg) {
3285feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3286feb9fe23SJed Brown 
32879566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATSEQMAIJ));
328826fbe8dcSKarl Rupp 
32890298fd71SBarry Smith       B->ops->setup   = NULL;
32904cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
32910fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
32924222ddf1SHong Zhang 
3293feb9fe23SJed Brown       b      = (Mat_SeqMAIJ *)B->data;
3294b9b97703SBarry Smith       b->dof = dof;
32954cb9d9b8SBarry Smith       b->AIJ = A;
329626fbe8dcSKarl Rupp 
3297d72c5c08SBarry Smith       if (dof == 2) {
3298cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3299cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3300cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3301cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3302bcc973b7SBarry Smith       } else if (dof == 3) {
3303bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3304bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3305bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3306bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3307bcc973b7SBarry Smith       } else if (dof == 4) {
3308bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3309bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3310bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3311bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3312f9fae5adSBarry Smith       } else if (dof == 5) {
3313f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3314f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3315f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3316f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33176cd98798SBarry Smith       } else if (dof == 6) {
33186cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33196cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33206cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33216cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3322ed8eea03SBarry Smith       } else if (dof == 7) {
3323ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3324ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3325ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3326ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
332766ed3db0SBarry Smith       } else if (dof == 8) {
332866ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
332966ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
333066ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
333166ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33322b692628SMatthew Knepley       } else if (dof == 9) {
33332b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33342b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33352b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33362b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3337b9cda22cSBarry Smith       } else if (dof == 10) {
3338b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3339b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3340b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3341b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3342dbdd7285SBarry Smith       } else if (dof == 11) {
3343dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3344dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3345dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3346dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33472f7816d4SBarry Smith       } else if (dof == 16) {
33482f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33492f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33502f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33512f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3352ed1c418cSBarry Smith       } else if (dof == 18) {
3353ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3354ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3355ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3356ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
335782b1193eSBarry Smith       } else {
33586861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33596861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33606861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33616861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
336282b1193eSBarry Smith       }
3363fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
33649566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3365fdc842d1SBarry Smith #endif
33669566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
33679566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3368f4a53059SBarry Smith     } else {
3369f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3370feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3371f4a53059SBarry Smith       IS           from, to;
3372f4a53059SBarry Smith       Vec          gvec;
3373f4a53059SBarry Smith 
33749566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATMPIMAIJ));
337526fbe8dcSKarl Rupp 
33760298fd71SBarry Smith       B->ops->setup   = NULL;
3377d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
33780fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
337926fbe8dcSKarl Rupp 
3380b9b97703SBarry Smith       b      = (Mat_MPIMAIJ *)B->data;
3381b9b97703SBarry Smith       b->dof = dof;
3382b9b97703SBarry Smith       b->A   = A;
338326fbe8dcSKarl Rupp 
33849566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
33859566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
33864cb9d9b8SBarry Smith 
33879566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec, &n));
33889566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
33899566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
33909566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w, dof));
33919566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w, VECSEQ));
3392f4a53059SBarry Smith 
3393f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
33949566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
33959566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3396f4a53059SBarry Smith 
3397f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
33989566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3399f4a53059SBarry Smith 
3400f4a53059SBarry Smith       /* generate the scatter context */
34019566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3402f4a53059SBarry Smith 
34039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34059566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3406f4a53059SBarry Smith 
3407f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34084cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34094cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34104cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
341126fbe8dcSKarl Rupp 
3412fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34139566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3414fdc842d1SBarry Smith #endif
34159566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
34169566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3417f4a53059SBarry Smith     }
34187dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3419ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
34209566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3421fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3422fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3423fdc842d1SBarry Smith     {
3424fdc842d1SBarry Smith       PetscBool flg;
3425fdc842d1SBarry Smith       if (convert) {
34269566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
342748a46eb9SPierre Jolivet         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3428fdc842d1SBarry Smith       }
3429fdc842d1SBarry Smith     }
3430fdc842d1SBarry Smith #endif
3431cd3bbe55SBarry Smith     *maij = B;
3432d72c5c08SBarry Smith   }
343382b1193eSBarry Smith   PetscFunctionReturn(0);
343482b1193eSBarry Smith }
3435