xref: /petsc/src/mat/impls/maij/maij.c (revision 11a5261e40035b7c793f2783a2ba6c7cd4f3b077)
1be1d678aSKris Buschelman 
2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
482b1193eSBarry Smith 
5b350b9acSSatish Balay /*@
6*11a5261eSBarry Smith    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7ff585edeSJed Brown 
8*11a5261eSBarry Smith    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9ff585edeSJed Brown 
10ff585edeSJed Brown    Input Parameter:
11*11a5261eSBarry Smith .  A - the `MATMAIJ` matrix
12ff585edeSJed Brown 
13ff585edeSJed Brown    Output Parameter:
14*11a5261eSBarry Smith .  B - the `MATAIJ` matrix
15ff585edeSJed Brown 
16ff585edeSJed Brown    Level: advanced
17ff585edeSJed Brown 
18*11a5261eSBarry Smith    Note:
19*11a5261eSBarry Smith     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20ff585edeSJed Brown 
21*11a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22ff585edeSJed Brown @*/
239371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) {
24ace3abfcSBarry Smith   PetscBool ismpimaij, isseqmaij;
25b9b97703SBarry Smith 
26b9b97703SBarry Smith   PetscFunctionBegin;
279566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
289566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29b9b97703SBarry Smith   if (ismpimaij) {
30b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
31b9b97703SBarry Smith 
32b9b97703SBarry Smith     *B = b->A;
33b9b97703SBarry Smith   } else if (isseqmaij) {
34b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
35b9b97703SBarry Smith 
36b9b97703SBarry Smith     *B = b->AIJ;
37b9b97703SBarry Smith   } else {
38b9b97703SBarry Smith     *B = A;
39b9b97703SBarry Smith   }
40b9b97703SBarry Smith   PetscFunctionReturn(0);
41b9b97703SBarry Smith }
42b9b97703SBarry Smith 
43480dffcfSJed Brown /*@
44*11a5261eSBarry Smith    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
45ff585edeSJed Brown 
463f9fe445SBarry Smith    Logically Collective
47ff585edeSJed Brown 
48d8d19677SJose E. Roman    Input Parameters:
49*11a5261eSBarry Smith +  A - the `MATMAIJ` matrix
50ff585edeSJed Brown -  dof - the block size for the new matrix
51ff585edeSJed Brown 
52ff585edeSJed Brown    Output Parameter:
53*11a5261eSBarry Smith .  B - the new `MATMAIJ` matrix
54ff585edeSJed Brown 
55ff585edeSJed Brown    Level: advanced
56ff585edeSJed Brown 
57*11a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()`
58ff585edeSJed Brown @*/
599371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) {
600298fd71SBarry Smith   Mat Aij = NULL;
61b9b97703SBarry Smith 
62b9b97703SBarry Smith   PetscFunctionBegin;
63c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A, dof, 2);
649566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A, &Aij));
659566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij, dof, B));
66b9b97703SBarry Smith   PetscFunctionReturn(0);
67b9b97703SBarry Smith }
68b9b97703SBarry Smith 
699371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) {
704cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
7182b1193eSBarry Smith 
7282b1193eSBarry Smith   PetscFunctionBegin;
739566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
749566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
784cb9d9b8SBarry Smith   PetscFunctionReturn(0);
794cb9d9b8SBarry Smith }
804cb9d9b8SBarry Smith 
819371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) {
82356c569eSBarry Smith   PetscFunctionBegin;
83ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
84356c569eSBarry Smith }
85356c569eSBarry Smith 
869371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) {
870fd73130SBarry Smith   Mat B;
880fd73130SBarry Smith 
890fd73130SBarry Smith   PetscFunctionBegin;
909566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
919566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
929566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
930fd73130SBarry Smith   PetscFunctionReturn(0);
940fd73130SBarry Smith }
950fd73130SBarry Smith 
969371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) {
970fd73130SBarry Smith   Mat B;
980fd73130SBarry Smith 
990fd73130SBarry Smith   PetscFunctionBegin;
1009566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1019566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1029566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1030fd73130SBarry Smith   PetscFunctionReturn(0);
1040fd73130SBarry Smith }
1050fd73130SBarry Smith 
1069371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) {
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1084cb9d9b8SBarry Smith 
1094cb9d9b8SBarry Smith   PetscFunctionBegin;
1109566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1119566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1129566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1139566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1149566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1159566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1162e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1199566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
120b9b97703SBarry Smith   PetscFunctionReturn(0);
121b9b97703SBarry Smith }
122b9b97703SBarry Smith 
1230bad9183SKris Buschelman /*MC
124fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1250bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
126*11a5261eSBarry Smith   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
1270bad9183SKris Buschelman 
1280bad9183SKris Buschelman   Operations provided:
12967b8a455SSatish Balay .vb
130*11a5261eSBarry Smith     MatMult()
131*11a5261eSBarry Smith     MatMultTranspose()
132*11a5261eSBarry Smith     MatMultAdd()
133*11a5261eSBarry Smith     MatMultTransposeAdd()
13467b8a455SSatish Balay .ve
1350bad9183SKris Buschelman 
1360bad9183SKris Buschelman   Level: advanced
1370bad9183SKris Buschelman 
138*11a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1390bad9183SKris Buschelman M*/
1400bad9183SKris Buschelman 
1419371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) {
1424cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
14364b52464SHong Zhang   PetscMPIInt  size;
14482b1193eSBarry Smith 
14582b1193eSBarry Smith   PetscFunctionBegin;
1469566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A, &b));
147b0a32e0cSBarry Smith   A->data = (void *)b;
14826fbe8dcSKarl Rupp 
1499566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
15026fbe8dcSKarl Rupp 
151356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
152f4a53059SBarry Smith 
153f4259b30SLisandro Dalcin   b->AIJ  = NULL;
154cd3bbe55SBarry Smith   b->dof  = 0;
155f4259b30SLisandro Dalcin   b->OAIJ = NULL;
156f4259b30SLisandro Dalcin   b->ctx  = NULL;
157f4259b30SLisandro Dalcin   b->w    = NULL;
1589566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
15964b52464SHong Zhang   if (size == 1) {
1609566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
16164b52464SHong Zhang   } else {
1629566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
16364b52464SHong Zhang   }
16432e7c8b0SBarry Smith   A->preallocated = PETSC_TRUE;
16532e7c8b0SBarry Smith   A->assembled    = PETSC_TRUE;
16682b1193eSBarry Smith   PetscFunctionReturn(0);
16782b1193eSBarry Smith }
16882b1193eSBarry Smith 
169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
1714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
172bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
173d2840e09SBarry Smith   const PetscScalar *x, *v;
174d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
175d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
176d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
17782b1193eSBarry Smith 
178bcc973b7SBarry Smith   PetscFunctionBegin;
1799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
181bcc973b7SBarry Smith   idx = a->j;
182bcc973b7SBarry Smith   v   = a->a;
183bcc973b7SBarry Smith   ii  = a->i;
184bcc973b7SBarry Smith 
185bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
186bcc973b7SBarry Smith     jrow = ii[i];
187bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
188bcc973b7SBarry Smith     sum1 = 0.0;
189bcc973b7SBarry Smith     sum2 = 0.0;
19026fbe8dcSKarl Rupp 
191b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
192bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
193bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
194bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
195bcc973b7SBarry Smith       jrow++;
196bcc973b7SBarry Smith     }
197bcc973b7SBarry Smith     y[2 * i]     = sum1;
198bcc973b7SBarry Smith     y[2 * i + 1] = sum2;
199bcc973b7SBarry Smith   }
200bcc973b7SBarry Smith 
2019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
2029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
20482b1193eSBarry Smith   PetscFunctionReturn(0);
20582b1193eSBarry Smith }
206bcc973b7SBarry Smith 
2079371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
2084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
209bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
210d2840e09SBarry Smith   const PetscScalar *x, *v;
211d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
212d2840e09SBarry Smith   PetscInt           n, i;
213d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
21482b1193eSBarry Smith 
215bcc973b7SBarry Smith   PetscFunctionBegin;
2169566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
2179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
219bfec09a0SHong Zhang 
220bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
221bfec09a0SHong Zhang     idx    = a->j + a->i[i];
222bfec09a0SHong Zhang     v      = a->a + a->i[i];
223bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
224bcc973b7SBarry Smith     alpha1 = x[2 * i];
225bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
22626fbe8dcSKarl Rupp     while (n-- > 0) {
22726fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
22826fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2299371c9d4SSatish Balay       idx++;
2309371c9d4SSatish Balay       v++;
23126fbe8dcSKarl Rupp     }
232bcc973b7SBarry Smith   }
2339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
23682b1193eSBarry Smith   PetscFunctionReturn(0);
23782b1193eSBarry Smith }
238bcc973b7SBarry Smith 
2399371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2404cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
241bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
242d2840e09SBarry Smith   const PetscScalar *x, *v;
243d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
244b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
245d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
24682b1193eSBarry Smith 
247bcc973b7SBarry Smith   PetscFunctionBegin;
2489566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
251bcc973b7SBarry Smith   idx = a->j;
252bcc973b7SBarry Smith   v   = a->a;
253bcc973b7SBarry Smith   ii  = a->i;
254bcc973b7SBarry Smith 
255bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
256bcc973b7SBarry Smith     jrow = ii[i];
257bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
258bcc973b7SBarry Smith     sum1 = 0.0;
259bcc973b7SBarry Smith     sum2 = 0.0;
260bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
261bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
262bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
263bcc973b7SBarry Smith       jrow++;
264bcc973b7SBarry Smith     }
265bcc973b7SBarry Smith     y[2 * i] += sum1;
266bcc973b7SBarry Smith     y[2 * i + 1] += sum2;
267bcc973b7SBarry Smith   }
268bcc973b7SBarry Smith 
2699566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
272bcc973b7SBarry Smith   PetscFunctionReturn(0);
27382b1193eSBarry Smith }
2749371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2754cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
276bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
277d2840e09SBarry Smith   const PetscScalar *x, *v;
278d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
279d2840e09SBarry Smith   PetscInt           n, i;
280d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
28182b1193eSBarry Smith 
282bcc973b7SBarry Smith   PetscFunctionBegin;
2839566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
286bfec09a0SHong Zhang 
287bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
288bfec09a0SHong Zhang     idx    = a->j + a->i[i];
289bfec09a0SHong Zhang     v      = a->a + a->i[i];
290bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
291bcc973b7SBarry Smith     alpha1 = x[2 * i];
292bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
29326fbe8dcSKarl Rupp     while (n-- > 0) {
29426fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
29526fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2969371c9d4SSatish Balay       idx++;
2979371c9d4SSatish Balay       v++;
29826fbe8dcSKarl Rupp     }
299bcc973b7SBarry Smith   }
3009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
3019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
303bcc973b7SBarry Smith   PetscFunctionReturn(0);
30482b1193eSBarry Smith }
305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3069371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3074cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
308bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
309d2840e09SBarry Smith   const PetscScalar *x, *v;
310d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
311d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
312d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
31382b1193eSBarry Smith 
314bcc973b7SBarry Smith   PetscFunctionBegin;
3159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
317bcc973b7SBarry Smith   idx = a->j;
318bcc973b7SBarry Smith   v   = a->a;
319bcc973b7SBarry Smith   ii  = a->i;
320bcc973b7SBarry Smith 
321bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
322bcc973b7SBarry Smith     jrow = ii[i];
323bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
324bcc973b7SBarry Smith     sum1 = 0.0;
325bcc973b7SBarry Smith     sum2 = 0.0;
326bcc973b7SBarry Smith     sum3 = 0.0;
32726fbe8dcSKarl Rupp 
328b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
329bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
330bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
331bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
332bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
333bcc973b7SBarry Smith       jrow++;
334bcc973b7SBarry Smith     }
335bcc973b7SBarry Smith     y[3 * i]     = sum1;
336bcc973b7SBarry Smith     y[3 * i + 1] = sum2;
337bcc973b7SBarry Smith     y[3 * i + 2] = sum3;
338bcc973b7SBarry Smith   }
339bcc973b7SBarry Smith 
3409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
3419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
343bcc973b7SBarry Smith   PetscFunctionReturn(0);
344bcc973b7SBarry Smith }
345bcc973b7SBarry Smith 
3469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
348bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
349d2840e09SBarry Smith   const PetscScalar *x, *v;
350d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
351d2840e09SBarry Smith   PetscInt           n, i;
352d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
353bcc973b7SBarry Smith 
354bcc973b7SBarry Smith   PetscFunctionBegin;
3559566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
3569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
358bfec09a0SHong Zhang 
359bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
360bfec09a0SHong Zhang     idx    = a->j + a->i[i];
361bfec09a0SHong Zhang     v      = a->a + a->i[i];
362bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
363bcc973b7SBarry Smith     alpha1 = x[3 * i];
364bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
365bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
366bcc973b7SBarry Smith     while (n-- > 0) {
367bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
368bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
369bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
3709371c9d4SSatish Balay       idx++;
3719371c9d4SSatish Balay       v++;
372bcc973b7SBarry Smith     }
373bcc973b7SBarry Smith   }
3749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
3759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
377bcc973b7SBarry Smith   PetscFunctionReturn(0);
378bcc973b7SBarry Smith }
379bcc973b7SBarry Smith 
3809371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
3814cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
382bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
383d2840e09SBarry Smith   const PetscScalar *x, *v;
384d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
385b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
386d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
387bcc973b7SBarry Smith 
388bcc973b7SBarry Smith   PetscFunctionBegin;
3899566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
3909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
392bcc973b7SBarry Smith   idx = a->j;
393bcc973b7SBarry Smith   v   = a->a;
394bcc973b7SBarry Smith   ii  = a->i;
395bcc973b7SBarry Smith 
396bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
397bcc973b7SBarry Smith     jrow = ii[i];
398bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
399bcc973b7SBarry Smith     sum1 = 0.0;
400bcc973b7SBarry Smith     sum2 = 0.0;
401bcc973b7SBarry Smith     sum3 = 0.0;
402bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
403bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
404bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
405bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
406bcc973b7SBarry Smith       jrow++;
407bcc973b7SBarry Smith     }
408bcc973b7SBarry Smith     y[3 * i] += sum1;
409bcc973b7SBarry Smith     y[3 * i + 1] += sum2;
410bcc973b7SBarry Smith     y[3 * i + 2] += sum3;
411bcc973b7SBarry Smith   }
412bcc973b7SBarry Smith 
4139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
416bcc973b7SBarry Smith   PetscFunctionReturn(0);
417bcc973b7SBarry Smith }
4189371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
4194cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
420bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
421d2840e09SBarry Smith   const PetscScalar *x, *v;
422d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
423d2840e09SBarry Smith   PetscInt           n, i;
424d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
425bcc973b7SBarry Smith 
426bcc973b7SBarry Smith   PetscFunctionBegin;
4279566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
430bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
431bfec09a0SHong Zhang     idx    = a->j + a->i[i];
432bfec09a0SHong Zhang     v      = a->a + a->i[i];
433bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
434bcc973b7SBarry Smith     alpha1 = x[3 * i];
435bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
436bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
437bcc973b7SBarry Smith     while (n-- > 0) {
438bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
439bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
440bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
4419371c9d4SSatish Balay       idx++;
4429371c9d4SSatish Balay       v++;
443bcc973b7SBarry Smith     }
444bcc973b7SBarry Smith   }
4459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
448bcc973b7SBarry Smith   PetscFunctionReturn(0);
449bcc973b7SBarry Smith }
450bcc973b7SBarry Smith 
451bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4529371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
4534cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
454bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
455d2840e09SBarry Smith   const PetscScalar *x, *v;
456d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
457d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
458d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
459bcc973b7SBarry Smith 
460bcc973b7SBarry Smith   PetscFunctionBegin;
4619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4629566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
463bcc973b7SBarry Smith   idx = a->j;
464bcc973b7SBarry Smith   v   = a->a;
465bcc973b7SBarry Smith   ii  = a->i;
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
468bcc973b7SBarry Smith     jrow = ii[i];
469bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
470bcc973b7SBarry Smith     sum1 = 0.0;
471bcc973b7SBarry Smith     sum2 = 0.0;
472bcc973b7SBarry Smith     sum3 = 0.0;
473bcc973b7SBarry Smith     sum4 = 0.0;
474b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
475bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
476bcc973b7SBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
477bcc973b7SBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
478bcc973b7SBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
479bcc973b7SBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
480bcc973b7SBarry Smith       jrow++;
481bcc973b7SBarry Smith     }
482bcc973b7SBarry Smith     y[4 * i]     = sum1;
483bcc973b7SBarry Smith     y[4 * i + 1] = sum2;
484bcc973b7SBarry Smith     y[4 * i + 2] = sum3;
485bcc973b7SBarry Smith     y[4 * i + 3] = sum4;
486bcc973b7SBarry Smith   }
487bcc973b7SBarry Smith 
4889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
4899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
491bcc973b7SBarry Smith   PetscFunctionReturn(0);
492bcc973b7SBarry Smith }
493bcc973b7SBarry Smith 
4949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
4954cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
496bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
497d2840e09SBarry Smith   const PetscScalar *x, *v;
498d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
499d2840e09SBarry Smith   PetscInt           n, i;
500d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
501bcc973b7SBarry Smith 
502bcc973b7SBarry Smith   PetscFunctionBegin;
5039566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
5049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5059566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
506bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
507bfec09a0SHong Zhang     idx    = a->j + a->i[i];
508bfec09a0SHong Zhang     v      = a->a + a->i[i];
509bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
510bcc973b7SBarry Smith     alpha1 = x[4 * i];
511bcc973b7SBarry Smith     alpha2 = x[4 * i + 1];
512bcc973b7SBarry Smith     alpha3 = x[4 * i + 2];
513bcc973b7SBarry Smith     alpha4 = x[4 * i + 3];
514bcc973b7SBarry Smith     while (n-- > 0) {
515bcc973b7SBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
516bcc973b7SBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
517bcc973b7SBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
518bcc973b7SBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5199371c9d4SSatish Balay       idx++;
5209371c9d4SSatish Balay       v++;
521bcc973b7SBarry Smith     }
522bcc973b7SBarry Smith   }
5239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
526bcc973b7SBarry Smith   PetscFunctionReturn(0);
527bcc973b7SBarry Smith }
528bcc973b7SBarry Smith 
5299371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5304cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
531f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
532d2840e09SBarry Smith   const PetscScalar *x, *v;
533d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
534b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
535d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
536f9fae5adSBarry Smith 
537f9fae5adSBarry Smith   PetscFunctionBegin;
5389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
541f9fae5adSBarry Smith   idx = a->j;
542f9fae5adSBarry Smith   v   = a->a;
543f9fae5adSBarry Smith   ii  = a->i;
544f9fae5adSBarry Smith 
545f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
546f9fae5adSBarry Smith     jrow = ii[i];
547f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
548f9fae5adSBarry Smith     sum1 = 0.0;
549f9fae5adSBarry Smith     sum2 = 0.0;
550f9fae5adSBarry Smith     sum3 = 0.0;
551f9fae5adSBarry Smith     sum4 = 0.0;
552f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
553f9fae5adSBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
554f9fae5adSBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
555f9fae5adSBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
556f9fae5adSBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
557f9fae5adSBarry Smith       jrow++;
558f9fae5adSBarry Smith     }
559f9fae5adSBarry Smith     y[4 * i] += sum1;
560f9fae5adSBarry Smith     y[4 * i + 1] += sum2;
561f9fae5adSBarry Smith     y[4 * i + 2] += sum3;
562f9fae5adSBarry Smith     y[4 * i + 3] += sum4;
563f9fae5adSBarry Smith   }
564f9fae5adSBarry Smith 
5659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
568f9fae5adSBarry Smith   PetscFunctionReturn(0);
569bcc973b7SBarry Smith }
5709371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
572f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
573d2840e09SBarry Smith   const PetscScalar *x, *v;
574d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
575d2840e09SBarry Smith   PetscInt           n, i;
576d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
577f9fae5adSBarry Smith 
578f9fae5adSBarry Smith   PetscFunctionBegin;
5799566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
582bfec09a0SHong Zhang 
583f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
584bfec09a0SHong Zhang     idx    = a->j + a->i[i];
585bfec09a0SHong Zhang     v      = a->a + a->i[i];
586f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
587f9fae5adSBarry Smith     alpha1 = x[4 * i];
588f9fae5adSBarry Smith     alpha2 = x[4 * i + 1];
589f9fae5adSBarry Smith     alpha3 = x[4 * i + 2];
590f9fae5adSBarry Smith     alpha4 = x[4 * i + 3];
591f9fae5adSBarry Smith     while (n-- > 0) {
592f9fae5adSBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
593f9fae5adSBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
594f9fae5adSBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
595f9fae5adSBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5969371c9d4SSatish Balay       idx++;
5979371c9d4SSatish Balay       v++;
598f9fae5adSBarry Smith     }
599f9fae5adSBarry Smith   }
6009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
6019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
603f9fae5adSBarry Smith   PetscFunctionReturn(0);
604f9fae5adSBarry Smith }
605f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6066cd98798SBarry Smith 
6079371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
609f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
610d2840e09SBarry Smith   const PetscScalar *x, *v;
611d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
612d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
613d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
614f9fae5adSBarry Smith 
615f9fae5adSBarry Smith   PetscFunctionBegin;
6169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
618f9fae5adSBarry Smith   idx = a->j;
619f9fae5adSBarry Smith   v   = a->a;
620f9fae5adSBarry Smith   ii  = a->i;
621f9fae5adSBarry Smith 
622f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
623f9fae5adSBarry Smith     jrow = ii[i];
624f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
625f9fae5adSBarry Smith     sum1 = 0.0;
626f9fae5adSBarry Smith     sum2 = 0.0;
627f9fae5adSBarry Smith     sum3 = 0.0;
628f9fae5adSBarry Smith     sum4 = 0.0;
629f9fae5adSBarry Smith     sum5 = 0.0;
63026fbe8dcSKarl Rupp 
631b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
632f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
633f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
634f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
635f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
636f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
637f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
638f9fae5adSBarry Smith       jrow++;
639f9fae5adSBarry Smith     }
640f9fae5adSBarry Smith     y[5 * i]     = sum1;
641f9fae5adSBarry Smith     y[5 * i + 1] = sum2;
642f9fae5adSBarry Smith     y[5 * i + 2] = sum3;
643f9fae5adSBarry Smith     y[5 * i + 3] = sum4;
644f9fae5adSBarry Smith     y[5 * i + 4] = sum5;
645f9fae5adSBarry Smith   }
646f9fae5adSBarry Smith 
6479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
6489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
650f9fae5adSBarry Smith   PetscFunctionReturn(0);
651f9fae5adSBarry Smith }
652f9fae5adSBarry Smith 
6539371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6544cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
655f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
656d2840e09SBarry Smith   const PetscScalar *x, *v;
657d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
658d2840e09SBarry Smith   PetscInt           n, i;
659d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
660f9fae5adSBarry Smith 
661f9fae5adSBarry Smith   PetscFunctionBegin;
6629566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
6639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
665bfec09a0SHong Zhang 
666f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
667bfec09a0SHong Zhang     idx    = a->j + a->i[i];
668bfec09a0SHong Zhang     v      = a->a + a->i[i];
669f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
670f9fae5adSBarry Smith     alpha1 = x[5 * i];
671f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
672f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
673f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
674f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
675f9fae5adSBarry Smith     while (n-- > 0) {
676f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
677f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
678f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
679f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
680f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
6819371c9d4SSatish Balay       idx++;
6829371c9d4SSatish Balay       v++;
683f9fae5adSBarry Smith     }
684f9fae5adSBarry Smith   }
6859566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
6869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
688f9fae5adSBarry Smith   PetscFunctionReturn(0);
689f9fae5adSBarry Smith }
690f9fae5adSBarry Smith 
6919371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
6924cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
693f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
694d2840e09SBarry Smith   const PetscScalar *x, *v;
695d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
696b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
697d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
698f9fae5adSBarry Smith 
699f9fae5adSBarry Smith   PetscFunctionBegin;
7009566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
703f9fae5adSBarry Smith   idx = a->j;
704f9fae5adSBarry Smith   v   = a->a;
705f9fae5adSBarry Smith   ii  = a->i;
706f9fae5adSBarry Smith 
707f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
708f9fae5adSBarry Smith     jrow = ii[i];
709f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
710f9fae5adSBarry Smith     sum1 = 0.0;
711f9fae5adSBarry Smith     sum2 = 0.0;
712f9fae5adSBarry Smith     sum3 = 0.0;
713f9fae5adSBarry Smith     sum4 = 0.0;
714f9fae5adSBarry Smith     sum5 = 0.0;
715f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
716f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
717f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
718f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
719f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
720f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
721f9fae5adSBarry Smith       jrow++;
722f9fae5adSBarry Smith     }
723f9fae5adSBarry Smith     y[5 * i] += sum1;
724f9fae5adSBarry Smith     y[5 * i + 1] += sum2;
725f9fae5adSBarry Smith     y[5 * i + 2] += sum3;
726f9fae5adSBarry Smith     y[5 * i + 3] += sum4;
727f9fae5adSBarry Smith     y[5 * i + 4] += sum5;
728f9fae5adSBarry Smith   }
729f9fae5adSBarry Smith 
7309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
733f9fae5adSBarry Smith   PetscFunctionReturn(0);
734f9fae5adSBarry Smith }
735f9fae5adSBarry Smith 
7369371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
7374cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
738f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
739d2840e09SBarry Smith   const PetscScalar *x, *v;
740d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
741d2840e09SBarry Smith   PetscInt           n, i;
742d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
743f9fae5adSBarry Smith 
744f9fae5adSBarry Smith   PetscFunctionBegin;
7459566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
748bfec09a0SHong Zhang 
749f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
750bfec09a0SHong Zhang     idx    = a->j + a->i[i];
751bfec09a0SHong Zhang     v      = a->a + a->i[i];
752f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
753f9fae5adSBarry Smith     alpha1 = x[5 * i];
754f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
755f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
756f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
757f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
758f9fae5adSBarry Smith     while (n-- > 0) {
759f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
760f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
761f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
762f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
763f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
7649371c9d4SSatish Balay       idx++;
7659371c9d4SSatish Balay       v++;
766f9fae5adSBarry Smith     }
767f9fae5adSBarry Smith   }
7689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
771f9fae5adSBarry Smith   PetscFunctionReturn(0);
772bcc973b7SBarry Smith }
773bcc973b7SBarry Smith 
7746cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7759371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
7766cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
7776cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
778d2840e09SBarry Smith   const PetscScalar *x, *v;
779d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
780d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
781d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
7826cd98798SBarry Smith 
7836cd98798SBarry Smith   PetscFunctionBegin;
7849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
7866cd98798SBarry Smith   idx = a->j;
7876cd98798SBarry Smith   v   = a->a;
7886cd98798SBarry Smith   ii  = a->i;
7896cd98798SBarry Smith 
7906cd98798SBarry Smith   for (i = 0; i < m; i++) {
7916cd98798SBarry Smith     jrow = ii[i];
7926cd98798SBarry Smith     n    = ii[i + 1] - jrow;
7936cd98798SBarry Smith     sum1 = 0.0;
7946cd98798SBarry Smith     sum2 = 0.0;
7956cd98798SBarry Smith     sum3 = 0.0;
7966cd98798SBarry Smith     sum4 = 0.0;
7976cd98798SBarry Smith     sum5 = 0.0;
7986cd98798SBarry Smith     sum6 = 0.0;
79926fbe8dcSKarl Rupp 
800b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
8016cd98798SBarry Smith     for (j = 0; j < n; j++) {
8026cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8036cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8046cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8056cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8066cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8076cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8086cd98798SBarry Smith       jrow++;
8096cd98798SBarry Smith     }
8106cd98798SBarry Smith     y[6 * i]     = sum1;
8116cd98798SBarry Smith     y[6 * i + 1] = sum2;
8126cd98798SBarry Smith     y[6 * i + 2] = sum3;
8136cd98798SBarry Smith     y[6 * i + 3] = sum4;
8146cd98798SBarry Smith     y[6 * i + 4] = sum5;
8156cd98798SBarry Smith     y[6 * i + 5] = sum6;
8166cd98798SBarry Smith   }
8176cd98798SBarry Smith 
8189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
8199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8216cd98798SBarry Smith   PetscFunctionReturn(0);
8226cd98798SBarry Smith }
8236cd98798SBarry Smith 
8249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
8256cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8266cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
827d2840e09SBarry Smith   const PetscScalar *x, *v;
828d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
829d2840e09SBarry Smith   PetscInt           n, i;
830d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
8316cd98798SBarry Smith 
8326cd98798SBarry Smith   PetscFunctionBegin;
8339566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
8349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
836bfec09a0SHong Zhang 
8376cd98798SBarry Smith   for (i = 0; i < m; i++) {
838bfec09a0SHong Zhang     idx    = a->j + a->i[i];
839bfec09a0SHong Zhang     v      = a->a + a->i[i];
8406cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
8416cd98798SBarry Smith     alpha1 = x[6 * i];
8426cd98798SBarry Smith     alpha2 = x[6 * i + 1];
8436cd98798SBarry Smith     alpha3 = x[6 * i + 2];
8446cd98798SBarry Smith     alpha4 = x[6 * i + 3];
8456cd98798SBarry Smith     alpha5 = x[6 * i + 4];
8466cd98798SBarry Smith     alpha6 = x[6 * i + 5];
8476cd98798SBarry Smith     while (n-- > 0) {
8486cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
8496cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
8506cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
8516cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
8526cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
8536cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
8549371c9d4SSatish Balay       idx++;
8559371c9d4SSatish Balay       v++;
8566cd98798SBarry Smith     }
8576cd98798SBarry Smith   }
8589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
8599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8616cd98798SBarry Smith   PetscFunctionReturn(0);
8626cd98798SBarry Smith }
8636cd98798SBarry Smith 
8649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
8656cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8666cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
867d2840e09SBarry Smith   const PetscScalar *x, *v;
868d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
869b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
870d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
8716cd98798SBarry Smith 
8726cd98798SBarry Smith   PetscFunctionBegin;
8739566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
8749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
8766cd98798SBarry Smith   idx = a->j;
8776cd98798SBarry Smith   v   = a->a;
8786cd98798SBarry Smith   ii  = a->i;
8796cd98798SBarry Smith 
8806cd98798SBarry Smith   for (i = 0; i < m; i++) {
8816cd98798SBarry Smith     jrow = ii[i];
8826cd98798SBarry Smith     n    = ii[i + 1] - jrow;
8836cd98798SBarry Smith     sum1 = 0.0;
8846cd98798SBarry Smith     sum2 = 0.0;
8856cd98798SBarry Smith     sum3 = 0.0;
8866cd98798SBarry Smith     sum4 = 0.0;
8876cd98798SBarry Smith     sum5 = 0.0;
8886cd98798SBarry Smith     sum6 = 0.0;
8896cd98798SBarry Smith     for (j = 0; j < n; j++) {
8906cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8916cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8926cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8936cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8946cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8956cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8966cd98798SBarry Smith       jrow++;
8976cd98798SBarry Smith     }
8986cd98798SBarry Smith     y[6 * i] += sum1;
8996cd98798SBarry Smith     y[6 * i + 1] += sum2;
9006cd98798SBarry Smith     y[6 * i + 2] += sum3;
9016cd98798SBarry Smith     y[6 * i + 3] += sum4;
9026cd98798SBarry Smith     y[6 * i + 4] += sum5;
9036cd98798SBarry Smith     y[6 * i + 5] += sum6;
9046cd98798SBarry Smith   }
9056cd98798SBarry Smith 
9069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9096cd98798SBarry Smith   PetscFunctionReturn(0);
9106cd98798SBarry Smith }
9116cd98798SBarry Smith 
9129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
9136cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
9146cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
915d2840e09SBarry Smith   const PetscScalar *x, *v;
916d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
917d2840e09SBarry Smith   PetscInt           n, i;
918d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
9196cd98798SBarry Smith 
9206cd98798SBarry Smith   PetscFunctionBegin;
9219566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
9229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
924bfec09a0SHong Zhang 
9256cd98798SBarry Smith   for (i = 0; i < m; i++) {
926bfec09a0SHong Zhang     idx    = a->j + a->i[i];
927bfec09a0SHong Zhang     v      = a->a + a->i[i];
9286cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
9296cd98798SBarry Smith     alpha1 = x[6 * i];
9306cd98798SBarry Smith     alpha2 = x[6 * i + 1];
9316cd98798SBarry Smith     alpha3 = x[6 * i + 2];
9326cd98798SBarry Smith     alpha4 = x[6 * i + 3];
9336cd98798SBarry Smith     alpha5 = x[6 * i + 4];
9346cd98798SBarry Smith     alpha6 = x[6 * i + 5];
9356cd98798SBarry Smith     while (n-- > 0) {
9366cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
9376cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
9386cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
9396cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
9406cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
9416cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
9429371c9d4SSatish Balay       idx++;
9439371c9d4SSatish Balay       v++;
9446cd98798SBarry Smith     }
9456cd98798SBarry Smith   }
9469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9496cd98798SBarry Smith   PetscFunctionReturn(0);
9506cd98798SBarry Smith }
9516cd98798SBarry Smith 
95266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
9539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
954ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
955ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
956d2840e09SBarry Smith   const PetscScalar *x, *v;
957d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
958d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
959d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
960ed8eea03SBarry Smith 
961ed8eea03SBarry Smith   PetscFunctionBegin;
9629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
964ed8eea03SBarry Smith   idx = a->j;
965ed8eea03SBarry Smith   v   = a->a;
966ed8eea03SBarry Smith   ii  = a->i;
967ed8eea03SBarry Smith 
968ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
969ed8eea03SBarry Smith     jrow = ii[i];
970ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
971ed8eea03SBarry Smith     sum1 = 0.0;
972ed8eea03SBarry Smith     sum2 = 0.0;
973ed8eea03SBarry Smith     sum3 = 0.0;
974ed8eea03SBarry Smith     sum4 = 0.0;
975ed8eea03SBarry Smith     sum5 = 0.0;
976ed8eea03SBarry Smith     sum6 = 0.0;
977ed8eea03SBarry Smith     sum7 = 0.0;
97826fbe8dcSKarl Rupp 
979b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
980ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
981ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
982ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
983ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
984ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
985ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
986ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
987ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
988ed8eea03SBarry Smith       jrow++;
989ed8eea03SBarry Smith     }
990ed8eea03SBarry Smith     y[7 * i]     = sum1;
991ed8eea03SBarry Smith     y[7 * i + 1] = sum2;
992ed8eea03SBarry Smith     y[7 * i + 2] = sum3;
993ed8eea03SBarry Smith     y[7 * i + 3] = sum4;
994ed8eea03SBarry Smith     y[7 * i + 4] = sum5;
995ed8eea03SBarry Smith     y[7 * i + 5] = sum6;
996ed8eea03SBarry Smith     y[7 * i + 6] = sum7;
997ed8eea03SBarry Smith   }
998ed8eea03SBarry Smith 
9999566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
10009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1002ed8eea03SBarry Smith   PetscFunctionReturn(0);
1003ed8eea03SBarry Smith }
1004ed8eea03SBarry Smith 
10059371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
1006ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1007ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1008d2840e09SBarry Smith   const PetscScalar *x, *v;
1009d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1010d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1011d2840e09SBarry Smith   PetscInt           n, i;
1012ed8eea03SBarry Smith 
1013ed8eea03SBarry Smith   PetscFunctionBegin;
10149566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
10159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1017ed8eea03SBarry Smith 
1018ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1019ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1020ed8eea03SBarry Smith     v      = a->a + a->i[i];
1021ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1022ed8eea03SBarry Smith     alpha1 = x[7 * i];
1023ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1024ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1025ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1026ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1027ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1028ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1029ed8eea03SBarry Smith     while (n-- > 0) {
1030ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1031ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1032ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1033ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1034ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1035ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1036ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
10379371c9d4SSatish Balay       idx++;
10389371c9d4SSatish Balay       v++;
1039ed8eea03SBarry Smith     }
1040ed8eea03SBarry Smith   }
10419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1044ed8eea03SBarry Smith   PetscFunctionReturn(0);
1045ed8eea03SBarry Smith }
1046ed8eea03SBarry Smith 
10479371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1048ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1049ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1050d2840e09SBarry Smith   const PetscScalar *x, *v;
1051d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1052d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1053ed8eea03SBarry Smith   PetscInt           n, i, jrow, j;
1054ed8eea03SBarry Smith 
1055ed8eea03SBarry Smith   PetscFunctionBegin;
10569566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
10579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1059ed8eea03SBarry Smith   idx = a->j;
1060ed8eea03SBarry Smith   v   = a->a;
1061ed8eea03SBarry Smith   ii  = a->i;
1062ed8eea03SBarry Smith 
1063ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1064ed8eea03SBarry Smith     jrow = ii[i];
1065ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
1066ed8eea03SBarry Smith     sum1 = 0.0;
1067ed8eea03SBarry Smith     sum2 = 0.0;
1068ed8eea03SBarry Smith     sum3 = 0.0;
1069ed8eea03SBarry Smith     sum4 = 0.0;
1070ed8eea03SBarry Smith     sum5 = 0.0;
1071ed8eea03SBarry Smith     sum6 = 0.0;
1072ed8eea03SBarry Smith     sum7 = 0.0;
1073ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
1074ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
1075ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1076ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1077ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1078ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1079ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1080ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1081ed8eea03SBarry Smith       jrow++;
1082ed8eea03SBarry Smith     }
1083ed8eea03SBarry Smith     y[7 * i] += sum1;
1084ed8eea03SBarry Smith     y[7 * i + 1] += sum2;
1085ed8eea03SBarry Smith     y[7 * i + 2] += sum3;
1086ed8eea03SBarry Smith     y[7 * i + 3] += sum4;
1087ed8eea03SBarry Smith     y[7 * i + 4] += sum5;
1088ed8eea03SBarry Smith     y[7 * i + 5] += sum6;
1089ed8eea03SBarry Smith     y[7 * i + 6] += sum7;
1090ed8eea03SBarry Smith   }
1091ed8eea03SBarry Smith 
10929566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1095ed8eea03SBarry Smith   PetscFunctionReturn(0);
1096ed8eea03SBarry Smith }
1097ed8eea03SBarry Smith 
10989371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1099ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1100ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1101d2840e09SBarry Smith   const PetscScalar *x, *v;
1102d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1103d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1104d2840e09SBarry Smith   PetscInt           n, i;
1105ed8eea03SBarry Smith 
1106ed8eea03SBarry Smith   PetscFunctionBegin;
11079566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
11089566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1110ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1111ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1112ed8eea03SBarry Smith     v      = a->a + a->i[i];
1113ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1114ed8eea03SBarry Smith     alpha1 = x[7 * i];
1115ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1116ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1117ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1118ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1119ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1120ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1121ed8eea03SBarry Smith     while (n-- > 0) {
1122ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1123ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1124ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1125ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1126ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1127ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1128ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
11299371c9d4SSatish Balay       idx++;
11309371c9d4SSatish Balay       v++;
1131ed8eea03SBarry Smith     }
1132ed8eea03SBarry Smith   }
11339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1136ed8eea03SBarry Smith   PetscFunctionReturn(0);
1137ed8eea03SBarry Smith }
1138ed8eea03SBarry Smith 
11399371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
114066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
114166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1142d2840e09SBarry Smith   const PetscScalar *x, *v;
1143d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1144d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1145d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
114666ed3db0SBarry Smith 
114766ed3db0SBarry Smith   PetscFunctionBegin;
11489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
115066ed3db0SBarry Smith   idx = a->j;
115166ed3db0SBarry Smith   v   = a->a;
115266ed3db0SBarry Smith   ii  = a->i;
115366ed3db0SBarry Smith 
115466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
115566ed3db0SBarry Smith     jrow = ii[i];
115666ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
115766ed3db0SBarry Smith     sum1 = 0.0;
115866ed3db0SBarry Smith     sum2 = 0.0;
115966ed3db0SBarry Smith     sum3 = 0.0;
116066ed3db0SBarry Smith     sum4 = 0.0;
116166ed3db0SBarry Smith     sum5 = 0.0;
116266ed3db0SBarry Smith     sum6 = 0.0;
116366ed3db0SBarry Smith     sum7 = 0.0;
116466ed3db0SBarry Smith     sum8 = 0.0;
116526fbe8dcSKarl Rupp 
1166b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
116766ed3db0SBarry Smith     for (j = 0; j < n; j++) {
116866ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
116966ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
117066ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
117166ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
117266ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
117366ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
117466ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
117566ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
117666ed3db0SBarry Smith       jrow++;
117766ed3db0SBarry Smith     }
117866ed3db0SBarry Smith     y[8 * i]     = sum1;
117966ed3db0SBarry Smith     y[8 * i + 1] = sum2;
118066ed3db0SBarry Smith     y[8 * i + 2] = sum3;
118166ed3db0SBarry Smith     y[8 * i + 3] = sum4;
118266ed3db0SBarry Smith     y[8 * i + 4] = sum5;
118366ed3db0SBarry Smith     y[8 * i + 5] = sum6;
118466ed3db0SBarry Smith     y[8 * i + 6] = sum7;
118566ed3db0SBarry Smith     y[8 * i + 7] = sum8;
118666ed3db0SBarry Smith   }
118766ed3db0SBarry Smith 
11889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
11899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
119166ed3db0SBarry Smith   PetscFunctionReturn(0);
119266ed3db0SBarry Smith }
119366ed3db0SBarry Smith 
11949371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
119566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
119666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1197d2840e09SBarry Smith   const PetscScalar *x, *v;
1198d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1199d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1200d2840e09SBarry Smith   PetscInt           n, i;
120166ed3db0SBarry Smith 
120266ed3db0SBarry Smith   PetscFunctionBegin;
12039566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
12049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12059566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1206bfec09a0SHong Zhang 
120766ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1208bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1209bfec09a0SHong Zhang     v      = a->a + a->i[i];
121066ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
121166ed3db0SBarry Smith     alpha1 = x[8 * i];
121266ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
121366ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
121466ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
121566ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
121666ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
121766ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
121866ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
121966ed3db0SBarry Smith     while (n-- > 0) {
122066ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
122166ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
122266ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
122366ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
122466ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
122566ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
122666ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
122766ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
12289371c9d4SSatish Balay       idx++;
12299371c9d4SSatish Balay       v++;
123066ed3db0SBarry Smith     }
123166ed3db0SBarry Smith   }
12329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
123566ed3db0SBarry Smith   PetscFunctionReturn(0);
123666ed3db0SBarry Smith }
123766ed3db0SBarry Smith 
12389371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
123966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
124066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1241d2840e09SBarry Smith   const PetscScalar *x, *v;
1242d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1243d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1244b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
124566ed3db0SBarry Smith 
124666ed3db0SBarry Smith   PetscFunctionBegin;
12479566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
12489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
125066ed3db0SBarry Smith   idx = a->j;
125166ed3db0SBarry Smith   v   = a->a;
125266ed3db0SBarry Smith   ii  = a->i;
125366ed3db0SBarry Smith 
125466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
125566ed3db0SBarry Smith     jrow = ii[i];
125666ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
125766ed3db0SBarry Smith     sum1 = 0.0;
125866ed3db0SBarry Smith     sum2 = 0.0;
125966ed3db0SBarry Smith     sum3 = 0.0;
126066ed3db0SBarry Smith     sum4 = 0.0;
126166ed3db0SBarry Smith     sum5 = 0.0;
126266ed3db0SBarry Smith     sum6 = 0.0;
126366ed3db0SBarry Smith     sum7 = 0.0;
126466ed3db0SBarry Smith     sum8 = 0.0;
126566ed3db0SBarry Smith     for (j = 0; j < n; j++) {
126666ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
126766ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
126866ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
126966ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
127066ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
127166ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
127266ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
127366ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
127466ed3db0SBarry Smith       jrow++;
127566ed3db0SBarry Smith     }
127666ed3db0SBarry Smith     y[8 * i] += sum1;
127766ed3db0SBarry Smith     y[8 * i + 1] += sum2;
127866ed3db0SBarry Smith     y[8 * i + 2] += sum3;
127966ed3db0SBarry Smith     y[8 * i + 3] += sum4;
128066ed3db0SBarry Smith     y[8 * i + 4] += sum5;
128166ed3db0SBarry Smith     y[8 * i + 5] += sum6;
128266ed3db0SBarry Smith     y[8 * i + 6] += sum7;
128366ed3db0SBarry Smith     y[8 * i + 7] += sum8;
128466ed3db0SBarry Smith   }
128566ed3db0SBarry Smith 
12869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
128966ed3db0SBarry Smith   PetscFunctionReturn(0);
129066ed3db0SBarry Smith }
129166ed3db0SBarry Smith 
12929371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
129366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
129466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1295d2840e09SBarry Smith   const PetscScalar *x, *v;
1296d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1297d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1298d2840e09SBarry Smith   PetscInt           n, i;
129966ed3db0SBarry Smith 
130066ed3db0SBarry Smith   PetscFunctionBegin;
13019566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
13029566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13039566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
130466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1305bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1306bfec09a0SHong Zhang     v      = a->a + a->i[i];
130766ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
130866ed3db0SBarry Smith     alpha1 = x[8 * i];
130966ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
131066ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
131166ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
131266ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
131366ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
131466ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
131566ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
131666ed3db0SBarry Smith     while (n-- > 0) {
131766ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
131866ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
131966ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
132066ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
132166ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
132266ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
132366ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
132466ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
13259371c9d4SSatish Balay       idx++;
13269371c9d4SSatish Balay       v++;
132766ed3db0SBarry Smith     }
132866ed3db0SBarry Smith   }
13299566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
13322f7816d4SBarry Smith   PetscFunctionReturn(0);
13332f7816d4SBarry Smith }
13342f7816d4SBarry Smith 
13352b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13369371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
13372b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13382b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1339d2840e09SBarry Smith   const PetscScalar *x, *v;
1340d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1341d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1342d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
13432b692628SMatthew Knepley 
13442b692628SMatthew Knepley   PetscFunctionBegin;
13459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
13472b692628SMatthew Knepley   idx = a->j;
13482b692628SMatthew Knepley   v   = a->a;
13492b692628SMatthew Knepley   ii  = a->i;
13502b692628SMatthew Knepley 
13512b692628SMatthew Knepley   for (i = 0; i < m; i++) {
13522b692628SMatthew Knepley     jrow = ii[i];
13532b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
13542b692628SMatthew Knepley     sum1 = 0.0;
13552b692628SMatthew Knepley     sum2 = 0.0;
13562b692628SMatthew Knepley     sum3 = 0.0;
13572b692628SMatthew Knepley     sum4 = 0.0;
13582b692628SMatthew Knepley     sum5 = 0.0;
13592b692628SMatthew Knepley     sum6 = 0.0;
13602b692628SMatthew Knepley     sum7 = 0.0;
13612b692628SMatthew Knepley     sum8 = 0.0;
13622b692628SMatthew Knepley     sum9 = 0.0;
136326fbe8dcSKarl Rupp 
1364b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
13652b692628SMatthew Knepley     for (j = 0; j < n; j++) {
13662b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
13672b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
13682b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
13692b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
13702b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
13712b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
13722b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
13732b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
13742b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
13752b692628SMatthew Knepley       jrow++;
13762b692628SMatthew Knepley     }
13772b692628SMatthew Knepley     y[9 * i]     = sum1;
13782b692628SMatthew Knepley     y[9 * i + 1] = sum2;
13792b692628SMatthew Knepley     y[9 * i + 2] = sum3;
13802b692628SMatthew Knepley     y[9 * i + 3] = sum4;
13812b692628SMatthew Knepley     y[9 * i + 4] = sum5;
13822b692628SMatthew Knepley     y[9 * i + 5] = sum6;
13832b692628SMatthew Knepley     y[9 * i + 6] = sum7;
13842b692628SMatthew Knepley     y[9 * i + 7] = sum8;
13852b692628SMatthew Knepley     y[9 * i + 8] = sum9;
13862b692628SMatthew Knepley   }
13872b692628SMatthew Knepley 
13889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
13899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
13912b692628SMatthew Knepley   PetscFunctionReturn(0);
13922b692628SMatthew Knepley }
13932b692628SMatthew Knepley 
1394b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1395b9cda22cSBarry Smith 
13969371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
13972b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13982b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1399d2840e09SBarry Smith   const PetscScalar *x, *v;
1400d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1401d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1402d2840e09SBarry Smith   PetscInt           n, i;
14032b692628SMatthew Knepley 
14042b692628SMatthew Knepley   PetscFunctionBegin;
14059566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
14069566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14079566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
14082b692628SMatthew Knepley 
14092b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14102b692628SMatthew Knepley     idx    = a->j + a->i[i];
14112b692628SMatthew Knepley     v      = a->a + a->i[i];
14122b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
14132b692628SMatthew Knepley     alpha1 = x[9 * i];
14142b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
14152b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
14162b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
14172b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
14182b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
14192b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
14202b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
14212f6bd0c6SMatthew Knepley     alpha9 = x[9 * i + 8];
14222b692628SMatthew Knepley     while (n-- > 0) {
14232b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
14242b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
14252b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
14262b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
14272b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
14282b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
14292b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
14302b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
14312b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
14329371c9d4SSatish Balay       idx++;
14339371c9d4SSatish Balay       v++;
14342b692628SMatthew Knepley     }
14352b692628SMatthew Knepley   }
14369566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14392b692628SMatthew Knepley   PetscFunctionReturn(0);
14402b692628SMatthew Knepley }
14412b692628SMatthew Knepley 
14429371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
14432b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14442b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1445d2840e09SBarry Smith   const PetscScalar *x, *v;
1446d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1447d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1448b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
14492b692628SMatthew Knepley 
14502b692628SMatthew Knepley   PetscFunctionBegin;
14519566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
14529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
14542b692628SMatthew Knepley   idx = a->j;
14552b692628SMatthew Knepley   v   = a->a;
14562b692628SMatthew Knepley   ii  = a->i;
14572b692628SMatthew Knepley 
14582b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14592b692628SMatthew Knepley     jrow = ii[i];
14602b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
14612b692628SMatthew Knepley     sum1 = 0.0;
14622b692628SMatthew Knepley     sum2 = 0.0;
14632b692628SMatthew Knepley     sum3 = 0.0;
14642b692628SMatthew Knepley     sum4 = 0.0;
14652b692628SMatthew Knepley     sum5 = 0.0;
14662b692628SMatthew Knepley     sum6 = 0.0;
14672b692628SMatthew Knepley     sum7 = 0.0;
14682b692628SMatthew Knepley     sum8 = 0.0;
14692b692628SMatthew Knepley     sum9 = 0.0;
14702b692628SMatthew Knepley     for (j = 0; j < n; j++) {
14712b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
14722b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
14732b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
14742b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
14752b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
14762b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
14772b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
14782b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
14792b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
14802b692628SMatthew Knepley       jrow++;
14812b692628SMatthew Knepley     }
14822b692628SMatthew Knepley     y[9 * i] += sum1;
14832b692628SMatthew Knepley     y[9 * i + 1] += sum2;
14842b692628SMatthew Knepley     y[9 * i + 2] += sum3;
14852b692628SMatthew Knepley     y[9 * i + 3] += sum4;
14862b692628SMatthew Knepley     y[9 * i + 4] += sum5;
14872b692628SMatthew Knepley     y[9 * i + 5] += sum6;
14882b692628SMatthew Knepley     y[9 * i + 6] += sum7;
14892b692628SMatthew Knepley     y[9 * i + 7] += sum8;
14902b692628SMatthew Knepley     y[9 * i + 8] += sum9;
14912b692628SMatthew Knepley   }
14922b692628SMatthew Knepley 
14939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
14962b692628SMatthew Knepley   PetscFunctionReturn(0);
14972b692628SMatthew Knepley }
14982b692628SMatthew Knepley 
14999371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
15002b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
15012b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1502d2840e09SBarry Smith   const PetscScalar *x, *v;
1503d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1504d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1505d2840e09SBarry Smith   PetscInt           n, i;
15062b692628SMatthew Knepley 
15072b692628SMatthew Knepley   PetscFunctionBegin;
15089566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
15099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
15112b692628SMatthew Knepley   for (i = 0; i < m; i++) {
15122b692628SMatthew Knepley     idx    = a->j + a->i[i];
15132b692628SMatthew Knepley     v      = a->a + a->i[i];
15142b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
15152b692628SMatthew Knepley     alpha1 = x[9 * i];
15162b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
15172b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
15182b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
15192b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
15202b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
15212b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
15222b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
15232b692628SMatthew Knepley     alpha9 = x[9 * i + 8];
15242b692628SMatthew Knepley     while (n-- > 0) {
15252b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
15262b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
15272b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
15282b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
15292b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
15302b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
15312b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
15322b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
15332b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
15349371c9d4SSatish Balay       idx++;
15359371c9d4SSatish Balay       v++;
15362b692628SMatthew Knepley     }
15372b692628SMatthew Knepley   }
15389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15412b692628SMatthew Knepley   PetscFunctionReturn(0);
15422b692628SMatthew Knepley }
15439371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1544b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1545b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1546d2840e09SBarry Smith   const PetscScalar *x, *v;
1547d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1548d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1549d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1550b9cda22cSBarry Smith 
1551b9cda22cSBarry Smith   PetscFunctionBegin;
15529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1554b9cda22cSBarry Smith   idx = a->j;
1555b9cda22cSBarry Smith   v   = a->a;
1556b9cda22cSBarry Smith   ii  = a->i;
1557b9cda22cSBarry Smith 
1558b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1559b9cda22cSBarry Smith     jrow  = ii[i];
1560b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1561b9cda22cSBarry Smith     sum1  = 0.0;
1562b9cda22cSBarry Smith     sum2  = 0.0;
1563b9cda22cSBarry Smith     sum3  = 0.0;
1564b9cda22cSBarry Smith     sum4  = 0.0;
1565b9cda22cSBarry Smith     sum5  = 0.0;
1566b9cda22cSBarry Smith     sum6  = 0.0;
1567b9cda22cSBarry Smith     sum7  = 0.0;
1568b9cda22cSBarry Smith     sum8  = 0.0;
1569b9cda22cSBarry Smith     sum9  = 0.0;
1570b9cda22cSBarry Smith     sum10 = 0.0;
157126fbe8dcSKarl Rupp 
1572b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
1573b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1574b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1575b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1576b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1577b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1578b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1579b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1580b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1581b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1582b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1583b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1584b9cda22cSBarry Smith       jrow++;
1585b9cda22cSBarry Smith     }
1586b9cda22cSBarry Smith     y[10 * i]     = sum1;
1587b9cda22cSBarry Smith     y[10 * i + 1] = sum2;
1588b9cda22cSBarry Smith     y[10 * i + 2] = sum3;
1589b9cda22cSBarry Smith     y[10 * i + 3] = sum4;
1590b9cda22cSBarry Smith     y[10 * i + 4] = sum5;
1591b9cda22cSBarry Smith     y[10 * i + 5] = sum6;
1592b9cda22cSBarry Smith     y[10 * i + 6] = sum7;
1593b9cda22cSBarry Smith     y[10 * i + 7] = sum8;
1594b9cda22cSBarry Smith     y[10 * i + 8] = sum9;
1595b9cda22cSBarry Smith     y[10 * i + 9] = sum10;
1596b9cda22cSBarry Smith   }
1597b9cda22cSBarry Smith 
15989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
15999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1601b9cda22cSBarry Smith   PetscFunctionReturn(0);
1602b9cda22cSBarry Smith }
1603b9cda22cSBarry Smith 
16049371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1605b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1606b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1607d2840e09SBarry Smith   const PetscScalar *x, *v;
1608d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1610b9cda22cSBarry Smith   PetscInt           n, i, jrow, j;
1611b9cda22cSBarry Smith 
1612b9cda22cSBarry Smith   PetscFunctionBegin;
16139566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
16149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1616b9cda22cSBarry Smith   idx = a->j;
1617b9cda22cSBarry Smith   v   = a->a;
1618b9cda22cSBarry Smith   ii  = a->i;
1619b9cda22cSBarry Smith 
1620b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1621b9cda22cSBarry Smith     jrow  = ii[i];
1622b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1623b9cda22cSBarry Smith     sum1  = 0.0;
1624b9cda22cSBarry Smith     sum2  = 0.0;
1625b9cda22cSBarry Smith     sum3  = 0.0;
1626b9cda22cSBarry Smith     sum4  = 0.0;
1627b9cda22cSBarry Smith     sum5  = 0.0;
1628b9cda22cSBarry Smith     sum6  = 0.0;
1629b9cda22cSBarry Smith     sum7  = 0.0;
1630b9cda22cSBarry Smith     sum8  = 0.0;
1631b9cda22cSBarry Smith     sum9  = 0.0;
1632b9cda22cSBarry Smith     sum10 = 0.0;
1633b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1634b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1635b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1636b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1637b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1638b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1639b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1640b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1641b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1642b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1643b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1644b9cda22cSBarry Smith       jrow++;
1645b9cda22cSBarry Smith     }
1646b9cda22cSBarry Smith     y[10 * i] += sum1;
1647b9cda22cSBarry Smith     y[10 * i + 1] += sum2;
1648b9cda22cSBarry Smith     y[10 * i + 2] += sum3;
1649b9cda22cSBarry Smith     y[10 * i + 3] += sum4;
1650b9cda22cSBarry Smith     y[10 * i + 4] += sum5;
1651b9cda22cSBarry Smith     y[10 * i + 5] += sum6;
1652b9cda22cSBarry Smith     y[10 * i + 6] += sum7;
1653b9cda22cSBarry Smith     y[10 * i + 7] += sum8;
1654b9cda22cSBarry Smith     y[10 * i + 8] += sum9;
1655b9cda22cSBarry Smith     y[10 * i + 9] += sum10;
1656b9cda22cSBarry Smith   }
1657b9cda22cSBarry Smith 
16589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
16599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1661b9cda22cSBarry Smith   PetscFunctionReturn(0);
1662b9cda22cSBarry Smith }
1663b9cda22cSBarry Smith 
16649371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1665b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1666b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1667d2840e09SBarry Smith   const PetscScalar *x, *v;
1668d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1669d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1670d2840e09SBarry Smith   PetscInt           n, i;
1671b9cda22cSBarry Smith 
1672b9cda22cSBarry Smith   PetscFunctionBegin;
16739566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
16749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1676b9cda22cSBarry Smith 
1677b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1678b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1679b9cda22cSBarry Smith     v       = a->a + a->i[i];
1680b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1681e29fdc22SBarry Smith     alpha1  = x[10 * i];
1682e29fdc22SBarry Smith     alpha2  = x[10 * i + 1];
1683e29fdc22SBarry Smith     alpha3  = x[10 * i + 2];
1684e29fdc22SBarry Smith     alpha4  = x[10 * i + 3];
1685e29fdc22SBarry Smith     alpha5  = x[10 * i + 4];
1686e29fdc22SBarry Smith     alpha6  = x[10 * i + 5];
1687e29fdc22SBarry Smith     alpha7  = x[10 * i + 6];
1688e29fdc22SBarry Smith     alpha8  = x[10 * i + 7];
1689e29fdc22SBarry Smith     alpha9  = x[10 * i + 8];
1690b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1691b9cda22cSBarry Smith     while (n-- > 0) {
1692e29fdc22SBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1693e29fdc22SBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1694e29fdc22SBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1695e29fdc22SBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1696e29fdc22SBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1697e29fdc22SBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1698e29fdc22SBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1699e29fdc22SBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1700e29fdc22SBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1701b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17029371c9d4SSatish Balay       idx++;
17039371c9d4SSatish Balay       v++;
1704b9cda22cSBarry Smith     }
1705b9cda22cSBarry Smith   }
17069566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1709b9cda22cSBarry Smith   PetscFunctionReturn(0);
1710b9cda22cSBarry Smith }
1711b9cda22cSBarry Smith 
17129371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1713b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1714b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1715d2840e09SBarry Smith   const PetscScalar *x, *v;
1716d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1717d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1718d2840e09SBarry Smith   PetscInt           n, i;
1719b9cda22cSBarry Smith 
1720b9cda22cSBarry Smith   PetscFunctionBegin;
17219566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
17229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1724b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1725b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1726b9cda22cSBarry Smith     v       = a->a + a->i[i];
1727b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1728b9cda22cSBarry Smith     alpha1  = x[10 * i];
1729b9cda22cSBarry Smith     alpha2  = x[10 * i + 1];
1730b9cda22cSBarry Smith     alpha3  = x[10 * i + 2];
1731b9cda22cSBarry Smith     alpha4  = x[10 * i + 3];
1732b9cda22cSBarry Smith     alpha5  = x[10 * i + 4];
1733b9cda22cSBarry Smith     alpha6  = x[10 * i + 5];
1734b9cda22cSBarry Smith     alpha7  = x[10 * i + 6];
1735b9cda22cSBarry Smith     alpha8  = x[10 * i + 7];
1736b9cda22cSBarry Smith     alpha9  = x[10 * i + 8];
1737b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1738b9cda22cSBarry Smith     while (n-- > 0) {
1739b9cda22cSBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1740b9cda22cSBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1741b9cda22cSBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1742b9cda22cSBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1743b9cda22cSBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1744b9cda22cSBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1745b9cda22cSBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1746b9cda22cSBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1747b9cda22cSBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1748b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17499371c9d4SSatish Balay       idx++;
17509371c9d4SSatish Balay       v++;
1751b9cda22cSBarry Smith     }
1752b9cda22cSBarry Smith   }
17539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1756b9cda22cSBarry Smith   PetscFunctionReturn(0);
1757b9cda22cSBarry Smith }
1758b9cda22cSBarry Smith 
17592f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
17609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1761dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1762dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1763d2840e09SBarry Smith   const PetscScalar *x, *v;
1764d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1765d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1766d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1767dbdd7285SBarry Smith 
1768dbdd7285SBarry Smith   PetscFunctionBegin;
17699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1771dbdd7285SBarry Smith   idx = a->j;
1772dbdd7285SBarry Smith   v   = a->a;
1773dbdd7285SBarry Smith   ii  = a->i;
1774dbdd7285SBarry Smith 
1775dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1776dbdd7285SBarry Smith     jrow  = ii[i];
1777dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1778dbdd7285SBarry Smith     sum1  = 0.0;
1779dbdd7285SBarry Smith     sum2  = 0.0;
1780dbdd7285SBarry Smith     sum3  = 0.0;
1781dbdd7285SBarry Smith     sum4  = 0.0;
1782dbdd7285SBarry Smith     sum5  = 0.0;
1783dbdd7285SBarry Smith     sum6  = 0.0;
1784dbdd7285SBarry Smith     sum7  = 0.0;
1785dbdd7285SBarry Smith     sum8  = 0.0;
1786dbdd7285SBarry Smith     sum9  = 0.0;
1787dbdd7285SBarry Smith     sum10 = 0.0;
1788dbdd7285SBarry Smith     sum11 = 0.0;
178926fbe8dcSKarl Rupp 
1790dbdd7285SBarry Smith     nonzerorow += (n > 0);
1791dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1792dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1793dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1794dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1795dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1796dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1797dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1798dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1799dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1800dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1801dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1802dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1803dbdd7285SBarry Smith       jrow++;
1804dbdd7285SBarry Smith     }
1805dbdd7285SBarry Smith     y[11 * i]      = sum1;
1806dbdd7285SBarry Smith     y[11 * i + 1]  = sum2;
1807dbdd7285SBarry Smith     y[11 * i + 2]  = sum3;
1808dbdd7285SBarry Smith     y[11 * i + 3]  = sum4;
1809dbdd7285SBarry Smith     y[11 * i + 4]  = sum5;
1810dbdd7285SBarry Smith     y[11 * i + 5]  = sum6;
1811dbdd7285SBarry Smith     y[11 * i + 6]  = sum7;
1812dbdd7285SBarry Smith     y[11 * i + 7]  = sum8;
1813dbdd7285SBarry Smith     y[11 * i + 8]  = sum9;
1814dbdd7285SBarry Smith     y[11 * i + 9]  = sum10;
1815dbdd7285SBarry Smith     y[11 * i + 10] = sum11;
1816dbdd7285SBarry Smith   }
1817dbdd7285SBarry Smith 
18189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
18199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1821dbdd7285SBarry Smith   PetscFunctionReturn(0);
1822dbdd7285SBarry Smith }
1823dbdd7285SBarry Smith 
18249371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1825dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1826dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1827d2840e09SBarry Smith   const PetscScalar *x, *v;
1828d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1829d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1830dbdd7285SBarry Smith   PetscInt           n, i, jrow, j;
1831dbdd7285SBarry Smith 
1832dbdd7285SBarry Smith   PetscFunctionBegin;
18339566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
18349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1836dbdd7285SBarry Smith   idx = a->j;
1837dbdd7285SBarry Smith   v   = a->a;
1838dbdd7285SBarry Smith   ii  = a->i;
1839dbdd7285SBarry Smith 
1840dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1841dbdd7285SBarry Smith     jrow  = ii[i];
1842dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1843dbdd7285SBarry Smith     sum1  = 0.0;
1844dbdd7285SBarry Smith     sum2  = 0.0;
1845dbdd7285SBarry Smith     sum3  = 0.0;
1846dbdd7285SBarry Smith     sum4  = 0.0;
1847dbdd7285SBarry Smith     sum5  = 0.0;
1848dbdd7285SBarry Smith     sum6  = 0.0;
1849dbdd7285SBarry Smith     sum7  = 0.0;
1850dbdd7285SBarry Smith     sum8  = 0.0;
1851dbdd7285SBarry Smith     sum9  = 0.0;
1852dbdd7285SBarry Smith     sum10 = 0.0;
1853dbdd7285SBarry Smith     sum11 = 0.0;
1854dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1855dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1856dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1857dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1858dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1859dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1860dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1861dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1862dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1863dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1864dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1865dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1866dbdd7285SBarry Smith       jrow++;
1867dbdd7285SBarry Smith     }
1868dbdd7285SBarry Smith     y[11 * i] += sum1;
1869dbdd7285SBarry Smith     y[11 * i + 1] += sum2;
1870dbdd7285SBarry Smith     y[11 * i + 2] += sum3;
1871dbdd7285SBarry Smith     y[11 * i + 3] += sum4;
1872dbdd7285SBarry Smith     y[11 * i + 4] += sum5;
1873dbdd7285SBarry Smith     y[11 * i + 5] += sum6;
1874dbdd7285SBarry Smith     y[11 * i + 6] += sum7;
1875dbdd7285SBarry Smith     y[11 * i + 7] += sum8;
1876dbdd7285SBarry Smith     y[11 * i + 8] += sum9;
1877dbdd7285SBarry Smith     y[11 * i + 9] += sum10;
1878dbdd7285SBarry Smith     y[11 * i + 10] += sum11;
1879dbdd7285SBarry Smith   }
1880dbdd7285SBarry Smith 
18819566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
18829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1884dbdd7285SBarry Smith   PetscFunctionReturn(0);
1885dbdd7285SBarry Smith }
1886dbdd7285SBarry Smith 
18879371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1888dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1889dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1890d2840e09SBarry Smith   const PetscScalar *x, *v;
1891d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1892d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1893d2840e09SBarry Smith   PetscInt           n, i;
1894dbdd7285SBarry Smith 
1895dbdd7285SBarry Smith   PetscFunctionBegin;
18969566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
18979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1899dbdd7285SBarry Smith 
1900dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1901dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1902dbdd7285SBarry Smith     v       = a->a + a->i[i];
1903dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1904dbdd7285SBarry Smith     alpha1  = x[11 * i];
1905dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1906dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1907dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1908dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1909dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1910dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1911dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1912dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1913dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1914dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1915dbdd7285SBarry Smith     while (n-- > 0) {
1916dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1917dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1918dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1919dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1920dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1921dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1922dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1923dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1924dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1925dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1926dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19279371c9d4SSatish Balay       idx++;
19289371c9d4SSatish Balay       v++;
1929dbdd7285SBarry Smith     }
1930dbdd7285SBarry Smith   }
19319566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1934dbdd7285SBarry Smith   PetscFunctionReturn(0);
1935dbdd7285SBarry Smith }
1936dbdd7285SBarry Smith 
19379371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1938dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1939dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1940d2840e09SBarry Smith   const PetscScalar *x, *v;
1941d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1942d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1943d2840e09SBarry Smith   PetscInt           n, i;
1944dbdd7285SBarry Smith 
1945dbdd7285SBarry Smith   PetscFunctionBegin;
19469566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
19479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1949dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1950dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1951dbdd7285SBarry Smith     v       = a->a + a->i[i];
1952dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1953dbdd7285SBarry Smith     alpha1  = x[11 * i];
1954dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1955dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1956dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1957dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1958dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1959dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1960dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1961dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1962dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1963dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1964dbdd7285SBarry Smith     while (n-- > 0) {
1965dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1966dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1967dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1968dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1969dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1970dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1971dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1972dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1973dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1974dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1975dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19769371c9d4SSatish Balay       idx++;
19779371c9d4SSatish Balay       v++;
1978dbdd7285SBarry Smith     }
1979dbdd7285SBarry Smith   }
19809566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1983dbdd7285SBarry Smith   PetscFunctionReturn(0);
1984dbdd7285SBarry Smith }
1985dbdd7285SBarry Smith 
1986dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
19879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
19882f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
19892f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1990d2840e09SBarry Smith   const PetscScalar *x, *v;
1991d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
19922f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1993d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1994d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
19952f7816d4SBarry Smith 
19962f7816d4SBarry Smith   PetscFunctionBegin;
19979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
19992f7816d4SBarry Smith   idx = a->j;
20002f7816d4SBarry Smith   v   = a->a;
20012f7816d4SBarry Smith   ii  = a->i;
20022f7816d4SBarry Smith 
20032f7816d4SBarry Smith   for (i = 0; i < m; i++) {
20042f7816d4SBarry Smith     jrow  = ii[i];
20052f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
20062f7816d4SBarry Smith     sum1  = 0.0;
20072f7816d4SBarry Smith     sum2  = 0.0;
20082f7816d4SBarry Smith     sum3  = 0.0;
20092f7816d4SBarry Smith     sum4  = 0.0;
20102f7816d4SBarry Smith     sum5  = 0.0;
20112f7816d4SBarry Smith     sum6  = 0.0;
20122f7816d4SBarry Smith     sum7  = 0.0;
20132f7816d4SBarry Smith     sum8  = 0.0;
20142f7816d4SBarry Smith     sum9  = 0.0;
20152f7816d4SBarry Smith     sum10 = 0.0;
20162f7816d4SBarry Smith     sum11 = 0.0;
20172f7816d4SBarry Smith     sum12 = 0.0;
20182f7816d4SBarry Smith     sum13 = 0.0;
20192f7816d4SBarry Smith     sum14 = 0.0;
20202f7816d4SBarry Smith     sum15 = 0.0;
20212f7816d4SBarry Smith     sum16 = 0.0;
202226fbe8dcSKarl Rupp 
2023b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
20242f7816d4SBarry Smith     for (j = 0; j < n; j++) {
20252f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
20262f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
20272f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
20282f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
20292f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
20302f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
20312f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
20322f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
20332f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
20342f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
20352f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
20362f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
20372f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
20382f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
20392f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
20402f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
20412f7816d4SBarry Smith       jrow++;
20422f7816d4SBarry Smith     }
20432f7816d4SBarry Smith     y[16 * i]      = sum1;
20442f7816d4SBarry Smith     y[16 * i + 1]  = sum2;
20452f7816d4SBarry Smith     y[16 * i + 2]  = sum3;
20462f7816d4SBarry Smith     y[16 * i + 3]  = sum4;
20472f7816d4SBarry Smith     y[16 * i + 4]  = sum5;
20482f7816d4SBarry Smith     y[16 * i + 5]  = sum6;
20492f7816d4SBarry Smith     y[16 * i + 6]  = sum7;
20502f7816d4SBarry Smith     y[16 * i + 7]  = sum8;
20512f7816d4SBarry Smith     y[16 * i + 8]  = sum9;
20522f7816d4SBarry Smith     y[16 * i + 9]  = sum10;
20532f7816d4SBarry Smith     y[16 * i + 10] = sum11;
20542f7816d4SBarry Smith     y[16 * i + 11] = sum12;
20552f7816d4SBarry Smith     y[16 * i + 12] = sum13;
20562f7816d4SBarry Smith     y[16 * i + 13] = sum14;
20572f7816d4SBarry Smith     y[16 * i + 14] = sum15;
20582f7816d4SBarry Smith     y[16 * i + 15] = sum16;
20592f7816d4SBarry Smith   }
20602f7816d4SBarry Smith 
20619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
20629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
20639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
20642f7816d4SBarry Smith   PetscFunctionReturn(0);
20652f7816d4SBarry Smith }
20662f7816d4SBarry Smith 
20679371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
20682f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
20692f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2070d2840e09SBarry Smith   const PetscScalar *x, *v;
2071d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
20722f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2073d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2074d2840e09SBarry Smith   PetscInt           n, i;
20752f7816d4SBarry Smith 
20762f7816d4SBarry Smith   PetscFunctionBegin;
20779566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
20789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
20799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2080bfec09a0SHong Zhang 
20812f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2082bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2083bfec09a0SHong Zhang     v       = a->a + a->i[i];
20842f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
20852f7816d4SBarry Smith     alpha1  = x[16 * i];
20862f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
20872f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
20882f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
20892f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
20902f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
20912f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
20922f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
20932f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
20942f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
20952f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
20962f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
20972f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
20982f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
20992f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
21002f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
21012f7816d4SBarry Smith     while (n-- > 0) {
21022f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
21032f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
21042f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
21052f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
21062f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
21072f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
21082f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
21092f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
21102f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
21112f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
21122f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
21132f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
21142f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
21152f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
21162f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
21172f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
21189371c9d4SSatish Balay       idx++;
21199371c9d4SSatish Balay       v++;
21202f7816d4SBarry Smith     }
21212f7816d4SBarry Smith   }
21229566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
21239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
21249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21252f7816d4SBarry Smith   PetscFunctionReturn(0);
21262f7816d4SBarry Smith }
21272f7816d4SBarry Smith 
21289371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
21292f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
21302f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2131d2840e09SBarry Smith   const PetscScalar *x, *v;
2132d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21332f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2134d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2135b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
21362f7816d4SBarry Smith 
21372f7816d4SBarry Smith   PetscFunctionBegin;
21389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
21399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
21412f7816d4SBarry Smith   idx = a->j;
21422f7816d4SBarry Smith   v   = a->a;
21432f7816d4SBarry Smith   ii  = a->i;
21442f7816d4SBarry Smith 
21452f7816d4SBarry Smith   for (i = 0; i < m; i++) {
21462f7816d4SBarry Smith     jrow  = ii[i];
21472f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
21482f7816d4SBarry Smith     sum1  = 0.0;
21492f7816d4SBarry Smith     sum2  = 0.0;
21502f7816d4SBarry Smith     sum3  = 0.0;
21512f7816d4SBarry Smith     sum4  = 0.0;
21522f7816d4SBarry Smith     sum5  = 0.0;
21532f7816d4SBarry Smith     sum6  = 0.0;
21542f7816d4SBarry Smith     sum7  = 0.0;
21552f7816d4SBarry Smith     sum8  = 0.0;
21562f7816d4SBarry Smith     sum9  = 0.0;
21572f7816d4SBarry Smith     sum10 = 0.0;
21582f7816d4SBarry Smith     sum11 = 0.0;
21592f7816d4SBarry Smith     sum12 = 0.0;
21602f7816d4SBarry Smith     sum13 = 0.0;
21612f7816d4SBarry Smith     sum14 = 0.0;
21622f7816d4SBarry Smith     sum15 = 0.0;
21632f7816d4SBarry Smith     sum16 = 0.0;
21642f7816d4SBarry Smith     for (j = 0; j < n; j++) {
21652f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
21662f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
21672f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
21682f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
21692f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
21702f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
21712f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
21722f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
21732f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
21742f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
21752f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
21762f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
21772f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
21782f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
21792f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
21802f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
21812f7816d4SBarry Smith       jrow++;
21822f7816d4SBarry Smith     }
21832f7816d4SBarry Smith     y[16 * i] += sum1;
21842f7816d4SBarry Smith     y[16 * i + 1] += sum2;
21852f7816d4SBarry Smith     y[16 * i + 2] += sum3;
21862f7816d4SBarry Smith     y[16 * i + 3] += sum4;
21872f7816d4SBarry Smith     y[16 * i + 4] += sum5;
21882f7816d4SBarry Smith     y[16 * i + 5] += sum6;
21892f7816d4SBarry Smith     y[16 * i + 6] += sum7;
21902f7816d4SBarry Smith     y[16 * i + 7] += sum8;
21912f7816d4SBarry Smith     y[16 * i + 8] += sum9;
21922f7816d4SBarry Smith     y[16 * i + 9] += sum10;
21932f7816d4SBarry Smith     y[16 * i + 10] += sum11;
21942f7816d4SBarry Smith     y[16 * i + 11] += sum12;
21952f7816d4SBarry Smith     y[16 * i + 12] += sum13;
21962f7816d4SBarry Smith     y[16 * i + 13] += sum14;
21972f7816d4SBarry Smith     y[16 * i + 14] += sum15;
21982f7816d4SBarry Smith     y[16 * i + 15] += sum16;
21992f7816d4SBarry Smith   }
22002f7816d4SBarry Smith 
22019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
22042f7816d4SBarry Smith   PetscFunctionReturn(0);
22052f7816d4SBarry Smith }
22062f7816d4SBarry Smith 
22079371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
22082f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
22092f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2210d2840e09SBarry Smith   const PetscScalar *x, *v;
2211d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
22122f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2213d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2214d2840e09SBarry Smith   PetscInt           n, i;
22152f7816d4SBarry Smith 
22162f7816d4SBarry Smith   PetscFunctionBegin;
22179566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
22189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
22202f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2221bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2222bfec09a0SHong Zhang     v       = a->a + a->i[i];
22232f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
22242f7816d4SBarry Smith     alpha1  = x[16 * i];
22252f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
22262f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
22272f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
22282f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
22292f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
22302f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
22312f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
22322f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
22332f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
22342f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
22352f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
22362f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
22372f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
22382f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
22392f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
22402f7816d4SBarry Smith     while (n-- > 0) {
22412f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
22422f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
22432f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
22442f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
22452f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
22462f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
22472f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
22482f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
22492f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
22502f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
22512f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
22522f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
22532f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
22542f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
22552f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
22562f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
22579371c9d4SSatish Balay       idx++;
22589371c9d4SSatish Balay       v++;
22592f7816d4SBarry Smith     }
22602f7816d4SBarry Smith   }
22619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
226466ed3db0SBarry Smith   PetscFunctionReturn(0);
226566ed3db0SBarry Smith }
226666ed3db0SBarry Smith 
2267ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
22689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2269ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2270ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2271d2840e09SBarry Smith   const PetscScalar *x, *v;
2272d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2273ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2274d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2275d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
2276ed1c418cSBarry Smith 
2277ed1c418cSBarry Smith   PetscFunctionBegin;
22789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2280ed1c418cSBarry Smith   idx = a->j;
2281ed1c418cSBarry Smith   v   = a->a;
2282ed1c418cSBarry Smith   ii  = a->i;
2283ed1c418cSBarry Smith 
2284ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2285ed1c418cSBarry Smith     jrow  = ii[i];
2286ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2287ed1c418cSBarry Smith     sum1  = 0.0;
2288ed1c418cSBarry Smith     sum2  = 0.0;
2289ed1c418cSBarry Smith     sum3  = 0.0;
2290ed1c418cSBarry Smith     sum4  = 0.0;
2291ed1c418cSBarry Smith     sum5  = 0.0;
2292ed1c418cSBarry Smith     sum6  = 0.0;
2293ed1c418cSBarry Smith     sum7  = 0.0;
2294ed1c418cSBarry Smith     sum8  = 0.0;
2295ed1c418cSBarry Smith     sum9  = 0.0;
2296ed1c418cSBarry Smith     sum10 = 0.0;
2297ed1c418cSBarry Smith     sum11 = 0.0;
2298ed1c418cSBarry Smith     sum12 = 0.0;
2299ed1c418cSBarry Smith     sum13 = 0.0;
2300ed1c418cSBarry Smith     sum14 = 0.0;
2301ed1c418cSBarry Smith     sum15 = 0.0;
2302ed1c418cSBarry Smith     sum16 = 0.0;
2303ed1c418cSBarry Smith     sum17 = 0.0;
2304ed1c418cSBarry Smith     sum18 = 0.0;
230526fbe8dcSKarl Rupp 
2306ed1c418cSBarry Smith     nonzerorow += (n > 0);
2307ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2308ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2309ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2310ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2311ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2312ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2313ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2314ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2315ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2316ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2317ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2318ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2319ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2320ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2321ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2322ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2323ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2324ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2325ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2326ed1c418cSBarry Smith       jrow++;
2327ed1c418cSBarry Smith     }
2328ed1c418cSBarry Smith     y[18 * i]      = sum1;
2329ed1c418cSBarry Smith     y[18 * i + 1]  = sum2;
2330ed1c418cSBarry Smith     y[18 * i + 2]  = sum3;
2331ed1c418cSBarry Smith     y[18 * i + 3]  = sum4;
2332ed1c418cSBarry Smith     y[18 * i + 4]  = sum5;
2333ed1c418cSBarry Smith     y[18 * i + 5]  = sum6;
2334ed1c418cSBarry Smith     y[18 * i + 6]  = sum7;
2335ed1c418cSBarry Smith     y[18 * i + 7]  = sum8;
2336ed1c418cSBarry Smith     y[18 * i + 8]  = sum9;
2337ed1c418cSBarry Smith     y[18 * i + 9]  = sum10;
2338ed1c418cSBarry Smith     y[18 * i + 10] = sum11;
2339ed1c418cSBarry Smith     y[18 * i + 11] = sum12;
2340ed1c418cSBarry Smith     y[18 * i + 12] = sum13;
2341ed1c418cSBarry Smith     y[18 * i + 13] = sum14;
2342ed1c418cSBarry Smith     y[18 * i + 14] = sum15;
2343ed1c418cSBarry Smith     y[18 * i + 15] = sum16;
2344ed1c418cSBarry Smith     y[18 * i + 16] = sum17;
2345ed1c418cSBarry Smith     y[18 * i + 17] = sum18;
2346ed1c418cSBarry Smith   }
2347ed1c418cSBarry Smith 
23489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
23499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
23509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2351ed1c418cSBarry Smith   PetscFunctionReturn(0);
2352ed1c418cSBarry Smith }
2353ed1c418cSBarry Smith 
23549371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2355ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2356ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2357d2840e09SBarry Smith   const PetscScalar *x, *v;
2358d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2359ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2360d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2361d2840e09SBarry Smith   PetscInt           n, i;
2362ed1c418cSBarry Smith 
2363ed1c418cSBarry Smith   PetscFunctionBegin;
23649566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
23659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
23669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2367ed1c418cSBarry Smith 
2368ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2369ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2370ed1c418cSBarry Smith     v       = a->a + a->i[i];
2371ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2372ed1c418cSBarry Smith     alpha1  = x[18 * i];
2373ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2374ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2375ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2376ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2377ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2378ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2379ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2380ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2381ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2382ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2383ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2384ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2385ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2386ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2387ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2388ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2389ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2390ed1c418cSBarry Smith     while (n-- > 0) {
2391ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2392ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2393ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2394ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2395ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2396ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2397ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2398ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2399ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2400ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2401ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2402ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2403ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2404ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2405ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2406ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2407ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2408ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
24099371c9d4SSatish Balay       idx++;
24109371c9d4SSatish Balay       v++;
2411ed1c418cSBarry Smith     }
2412ed1c418cSBarry Smith   }
24139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2416ed1c418cSBarry Smith   PetscFunctionReturn(0);
2417ed1c418cSBarry Smith }
2418ed1c418cSBarry Smith 
24199371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2420ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2421ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2422d2840e09SBarry Smith   const PetscScalar *x, *v;
2423d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2424ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2425d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2426ed1c418cSBarry Smith   PetscInt           n, i, jrow, j;
2427ed1c418cSBarry Smith 
2428ed1c418cSBarry Smith   PetscFunctionBegin;
24299566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
24309566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
24319566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2432ed1c418cSBarry Smith   idx = a->j;
2433ed1c418cSBarry Smith   v   = a->a;
2434ed1c418cSBarry Smith   ii  = a->i;
2435ed1c418cSBarry Smith 
2436ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2437ed1c418cSBarry Smith     jrow  = ii[i];
2438ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2439ed1c418cSBarry Smith     sum1  = 0.0;
2440ed1c418cSBarry Smith     sum2  = 0.0;
2441ed1c418cSBarry Smith     sum3  = 0.0;
2442ed1c418cSBarry Smith     sum4  = 0.0;
2443ed1c418cSBarry Smith     sum5  = 0.0;
2444ed1c418cSBarry Smith     sum6  = 0.0;
2445ed1c418cSBarry Smith     sum7  = 0.0;
2446ed1c418cSBarry Smith     sum8  = 0.0;
2447ed1c418cSBarry Smith     sum9  = 0.0;
2448ed1c418cSBarry Smith     sum10 = 0.0;
2449ed1c418cSBarry Smith     sum11 = 0.0;
2450ed1c418cSBarry Smith     sum12 = 0.0;
2451ed1c418cSBarry Smith     sum13 = 0.0;
2452ed1c418cSBarry Smith     sum14 = 0.0;
2453ed1c418cSBarry Smith     sum15 = 0.0;
2454ed1c418cSBarry Smith     sum16 = 0.0;
2455ed1c418cSBarry Smith     sum17 = 0.0;
2456ed1c418cSBarry Smith     sum18 = 0.0;
2457ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2458ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2459ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2460ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2461ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2462ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2463ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2464ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2465ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2466ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2467ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2468ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2469ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2470ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2471ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2472ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2473ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2474ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2475ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2476ed1c418cSBarry Smith       jrow++;
2477ed1c418cSBarry Smith     }
2478ed1c418cSBarry Smith     y[18 * i] += sum1;
2479ed1c418cSBarry Smith     y[18 * i + 1] += sum2;
2480ed1c418cSBarry Smith     y[18 * i + 2] += sum3;
2481ed1c418cSBarry Smith     y[18 * i + 3] += sum4;
2482ed1c418cSBarry Smith     y[18 * i + 4] += sum5;
2483ed1c418cSBarry Smith     y[18 * i + 5] += sum6;
2484ed1c418cSBarry Smith     y[18 * i + 6] += sum7;
2485ed1c418cSBarry Smith     y[18 * i + 7] += sum8;
2486ed1c418cSBarry Smith     y[18 * i + 8] += sum9;
2487ed1c418cSBarry Smith     y[18 * i + 9] += sum10;
2488ed1c418cSBarry Smith     y[18 * i + 10] += sum11;
2489ed1c418cSBarry Smith     y[18 * i + 11] += sum12;
2490ed1c418cSBarry Smith     y[18 * i + 12] += sum13;
2491ed1c418cSBarry Smith     y[18 * i + 13] += sum14;
2492ed1c418cSBarry Smith     y[18 * i + 14] += sum15;
2493ed1c418cSBarry Smith     y[18 * i + 15] += sum16;
2494ed1c418cSBarry Smith     y[18 * i + 16] += sum17;
2495ed1c418cSBarry Smith     y[18 * i + 17] += sum18;
2496ed1c418cSBarry Smith   }
2497ed1c418cSBarry Smith 
24989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2501ed1c418cSBarry Smith   PetscFunctionReturn(0);
2502ed1c418cSBarry Smith }
2503ed1c418cSBarry Smith 
25049371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2505ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2506ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2507d2840e09SBarry Smith   const PetscScalar *x, *v;
2508d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2509ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2510d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2511d2840e09SBarry Smith   PetscInt           n, i;
2512ed1c418cSBarry Smith 
2513ed1c418cSBarry Smith   PetscFunctionBegin;
25149566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
25159566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2517ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2518ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2519ed1c418cSBarry Smith     v       = a->a + a->i[i];
2520ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2521ed1c418cSBarry Smith     alpha1  = x[18 * i];
2522ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2523ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2524ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2525ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2526ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2527ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2528ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2529ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2530ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2531ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2532ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2533ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2534ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2535ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2536ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2537ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2538ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2539ed1c418cSBarry Smith     while (n-- > 0) {
2540ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2541ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2542ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2543ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2544ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2545ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2546ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2547ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2548ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2549ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2550ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2551ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2552ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2553ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2554ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2555ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2556ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2557ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
25589371c9d4SSatish Balay       idx++;
25599371c9d4SSatish Balay       v++;
2560ed1c418cSBarry Smith     }
2561ed1c418cSBarry Smith   }
25629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
25639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2565ed1c418cSBarry Smith   PetscFunctionReturn(0);
2566ed1c418cSBarry Smith }
2567ed1c418cSBarry Smith 
25689371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
25696861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
25706861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
25716861f193SBarry Smith   const PetscScalar *x, *v;
25726861f193SBarry Smith   PetscScalar       *y, *sums;
25736861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
25746861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
25756861f193SBarry Smith 
25766861f193SBarry Smith   PetscFunctionBegin;
25779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25789566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
25799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
25806861f193SBarry Smith   idx = a->j;
25816861f193SBarry Smith   v   = a->a;
25826861f193SBarry Smith   ii  = a->i;
25836861f193SBarry Smith 
25846861f193SBarry Smith   for (i = 0; i < m; i++) {
25856861f193SBarry Smith     jrow = ii[i];
25866861f193SBarry Smith     n    = ii[i + 1] - jrow;
25876861f193SBarry Smith     sums = y + dof * i;
25886861f193SBarry Smith     for (j = 0; j < n; j++) {
2589ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
25906861f193SBarry Smith       jrow++;
25916861f193SBarry Smith     }
25926861f193SBarry Smith   }
25936861f193SBarry Smith 
25949566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
25959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
25976861f193SBarry Smith   PetscFunctionReturn(0);
25986861f193SBarry Smith }
25996861f193SBarry Smith 
26009371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26016861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26026861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26036861f193SBarry Smith   const PetscScalar *x, *v;
26046861f193SBarry Smith   PetscScalar       *y, *sums;
26056861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26066861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
26076861f193SBarry Smith 
26086861f193SBarry Smith   PetscFunctionBegin;
26099566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26109566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26126861f193SBarry Smith   idx = a->j;
26136861f193SBarry Smith   v   = a->a;
26146861f193SBarry Smith   ii  = a->i;
26156861f193SBarry Smith 
26166861f193SBarry Smith   for (i = 0; i < m; i++) {
26176861f193SBarry Smith     jrow = ii[i];
26186861f193SBarry Smith     n    = ii[i + 1] - jrow;
26196861f193SBarry Smith     sums = y + dof * i;
26206861f193SBarry Smith     for (j = 0; j < n; j++) {
2621ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
26226861f193SBarry Smith       jrow++;
26236861f193SBarry Smith     }
26246861f193SBarry Smith   }
26256861f193SBarry Smith 
26269566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26296861f193SBarry Smith   PetscFunctionReturn(0);
26306861f193SBarry Smith }
26316861f193SBarry Smith 
26329371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
26336861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26346861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26356861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26366861f193SBarry Smith   PetscScalar       *y;
26376861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26386861f193SBarry Smith   PetscInt           n, i, k;
26396861f193SBarry Smith 
26406861f193SBarry Smith   PetscFunctionBegin;
26419566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26429566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
26439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
26446861f193SBarry Smith   for (i = 0; i < m; i++) {
26456861f193SBarry Smith     idx   = a->j + a->i[i];
26466861f193SBarry Smith     v     = a->a + a->i[i];
26476861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26486861f193SBarry Smith     alpha = x + dof * i;
26496861f193SBarry Smith     while (n-- > 0) {
2650ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
26519371c9d4SSatish Balay       idx++;
26529371c9d4SSatish Balay       v++;
26536861f193SBarry Smith     }
26546861f193SBarry Smith   }
26559566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
26586861f193SBarry Smith   PetscFunctionReturn(0);
26596861f193SBarry Smith }
26606861f193SBarry Smith 
26619371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26626861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26636861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26646861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26656861f193SBarry Smith   PetscScalar       *y;
26666861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26676861f193SBarry Smith   PetscInt           n, i, k;
26686861f193SBarry Smith 
26696861f193SBarry Smith   PetscFunctionBegin;
26709566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26719566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26736861f193SBarry Smith   for (i = 0; i < m; i++) {
26746861f193SBarry Smith     idx   = a->j + a->i[i];
26756861f193SBarry Smith     v     = a->a + a->i[i];
26766861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26776861f193SBarry Smith     alpha = x + dof * i;
26786861f193SBarry Smith     while (n-- > 0) {
2679ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
26809371c9d4SSatish Balay       idx++;
26819371c9d4SSatish Balay       v++;
26826861f193SBarry Smith     }
26836861f193SBarry Smith   }
26849566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26876861f193SBarry Smith   PetscFunctionReturn(0);
26886861f193SBarry Smith }
26896861f193SBarry Smith 
2690f4a53059SBarry Smith /*===================================================================================*/
26919371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
26924cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2693f4a53059SBarry Smith 
2694b24ad042SBarry Smith   PetscFunctionBegin;
2695f4a53059SBarry Smith   /* start the scatter */
26969566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
26979566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
26989566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
26999566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2700f4a53059SBarry Smith   PetscFunctionReturn(0);
2701f4a53059SBarry Smith }
2702f4a53059SBarry Smith 
27039371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
27044cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2705b24ad042SBarry Smith 
27064cb9d9b8SBarry Smith   PetscFunctionBegin;
27079566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27089566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
27099566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27109566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27114cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27124cb9d9b8SBarry Smith }
27134cb9d9b8SBarry Smith 
27149371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27154cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
27164cb9d9b8SBarry Smith 
2717b24ad042SBarry Smith   PetscFunctionBegin;
27184cb9d9b8SBarry Smith   /* start the scatter */
27199566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27209566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
27219566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27229566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
27234cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27244cb9d9b8SBarry Smith }
27254cb9d9b8SBarry Smith 
27269371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27274cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2728b24ad042SBarry Smith 
27294cb9d9b8SBarry Smith   PetscFunctionBegin;
27309566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27319566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
27329566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27339566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27344cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27354cb9d9b8SBarry Smith }
27364cb9d9b8SBarry Smith 
273795ddefa2SHong Zhang /* ----------------------------------------------------------------*/
27389371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) {
27394222ddf1SHong Zhang   Mat_Product *product = C->product;
27404222ddf1SHong Zhang 
27414222ddf1SHong Zhang   PetscFunctionBegin;
27424222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
27434222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
274498921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
27454222ddf1SHong Zhang   PetscFunctionReturn(0);
27464222ddf1SHong Zhang }
27474222ddf1SHong Zhang 
27489371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) {
27494222ddf1SHong Zhang   Mat_Product *product = C->product;
27504222ddf1SHong Zhang   PetscBool    flg     = PETSC_FALSE;
27514222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
27524222ddf1SHong Zhang   PetscInt     alg = 1; /* set default algorithm */
27534222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
27544222ddf1SHong Zhang   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
27554222ddf1SHong Zhang   PetscInt    nalg        = 4;
27564222ddf1SHong Zhang #else
27574222ddf1SHong Zhang   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
27584222ddf1SHong Zhang   PetscInt    nalg        = 5;
27594222ddf1SHong Zhang #endif
27604222ddf1SHong Zhang 
27614222ddf1SHong Zhang   PetscFunctionBegin;
276208401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
27634222ddf1SHong Zhang 
27644222ddf1SHong Zhang   /* PtAP */
27654222ddf1SHong Zhang   /* Check matrix local sizes */
27669371c9d4SSatish Balay   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27679371c9d4SSatish Balay              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
27689371c9d4SSatish Balay   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27699371c9d4SSatish Balay              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
27704222ddf1SHong Zhang 
27714222ddf1SHong Zhang   /* Set the default algorithm */
27729566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
277348a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
27744222ddf1SHong Zhang 
27754222ddf1SHong Zhang   /* Get runtime option */
2776d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
27779566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
277848a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2779d0609cedSBarry Smith   PetscOptionsEnd();
27804222ddf1SHong Zhang 
27819566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
27824222ddf1SHong Zhang   if (flg) {
27834222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
27844222ddf1SHong Zhang     PetscFunctionReturn(0);
27854222ddf1SHong Zhang   }
27864222ddf1SHong Zhang 
27879566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
27884222ddf1SHong Zhang   if (flg) {
27894222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
27904222ddf1SHong Zhang     PetscFunctionReturn(0);
27914222ddf1SHong Zhang   }
27924222ddf1SHong Zhang 
27934222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
27949566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
27959566063dSJacob Faibussowitsch   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
27969566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
27974222ddf1SHong Zhang   PetscFunctionReturn(0);
27984222ddf1SHong Zhang }
27994222ddf1SHong Zhang 
28004222ddf1SHong Zhang /* ----------------------------------------------------------------*/
28019371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) {
28020298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
28037ba1a0bfSKris Buschelman   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
28047ba1a0bfSKris Buschelman   Mat                P  = pp->AIJ;
28057ba1a0bfSKris Buschelman   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2806d2840e09SBarry Smith   PetscInt          *pti, *ptj, *ptJ;
28077ba1a0bfSKris Buschelman   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2808d2840e09SBarry Smith   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2809d2840e09SBarry Smith   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
28107ba1a0bfSKris Buschelman   MatScalar         *ca;
2811d2840e09SBarry Smith   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
28127ba1a0bfSKris Buschelman 
28137ba1a0bfSKris Buschelman   PetscFunctionBegin;
28147ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28159566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
28167ba1a0bfSKris Buschelman 
28177ba1a0bfSKris Buschelman   cn = pn * ppdof;
28187ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28197ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn + 1, &ci));
28217ba1a0bfSKris Buschelman   ci[0] = 0;
28227ba1a0bfSKris Buschelman 
28237ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
28259566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow, an));
28269566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow, cn));
28277ba1a0bfSKris Buschelman 
28287ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28297ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28307ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28319566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
28327ba1a0bfSKris Buschelman   current_space = free_space;
28337ba1a0bfSKris Buschelman 
28347ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
28357ba1a0bfSKris Buschelman   for (i = 0; i < pn; i++) {
28367ba1a0bfSKris Buschelman     ptnzi = pti[i + 1] - pti[i];
28377ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
28387ba1a0bfSKris Buschelman     for (dof = 0; dof < ppdof; dof++) {
28397ba1a0bfSKris Buschelman       ptanzi = 0;
28407ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
28417ba1a0bfSKris Buschelman       for (j = 0; j < ptnzi; j++) {
28427ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
28437ba1a0bfSKris Buschelman         arow = ptJ[j] * ppdof + dof;
28447ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
28457ba1a0bfSKris Buschelman         anzj = ai[arow + 1] - ai[arow];
28467ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
28477ba1a0bfSKris Buschelman         for (k = 0; k < anzj; k++) {
28487ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
28497ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
28507ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
28517ba1a0bfSKris Buschelman           }
28527ba1a0bfSKris Buschelman         }
28537ba1a0bfSKris Buschelman       }
28547ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
28557ba1a0bfSKris Buschelman       ptaj = ptasparserow;
28567ba1a0bfSKris Buschelman       cnzi = 0;
28577ba1a0bfSKris Buschelman       for (j = 0; j < ptanzi; j++) {
28587ba1a0bfSKris Buschelman         /* Get offset within block of P */
28597ba1a0bfSKris Buschelman         pshift = *ptaj % ppdof;
28607ba1a0bfSKris Buschelman         /* Get block row of P */
28617ba1a0bfSKris Buschelman         prow   = (*ptaj++) / ppdof; /* integer division */
28627ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
28637ba1a0bfSKris Buschelman         pnzj   = pi[prow + 1] - pi[prow];
28647ba1a0bfSKris Buschelman         pjj    = pj + pi[prow];
28657ba1a0bfSKris Buschelman         for (k = 0; k < pnzj; k++) {
28667ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
28677ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
28687ba1a0bfSKris Buschelman           if (!denserow[pjj[k] * ppdof + pshift]) {
28697ba1a0bfSKris Buschelman             denserow[pjj[k] * ppdof + pshift] = -1;
28707ba1a0bfSKris Buschelman             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
28717ba1a0bfSKris Buschelman           }
28727ba1a0bfSKris Buschelman         }
28737ba1a0bfSKris Buschelman       }
28747ba1a0bfSKris Buschelman 
28757ba1a0bfSKris Buschelman       /* sort sparserow */
28769566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi, sparserow));
28777ba1a0bfSKris Buschelman 
28787ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
28797ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
288048a46eb9SPierre Jolivet       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
28817ba1a0bfSKris Buschelman 
28827ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
28839566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
288426fbe8dcSKarl Rupp 
28857ba1a0bfSKris Buschelman       current_space->array += cnzi;
28867ba1a0bfSKris Buschelman       current_space->local_used += cnzi;
28877ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
28887ba1a0bfSKris Buschelman 
288926fbe8dcSKarl Rupp       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
289026fbe8dcSKarl Rupp       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
289126fbe8dcSKarl Rupp 
28927ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
28937ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
28947ba1a0bfSKris Buschelman       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
28957ba1a0bfSKris Buschelman     }
28967ba1a0bfSKris Buschelman   }
28977ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
28987ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
28997ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
29019566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
29029566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
29037ba1a0bfSKris Buschelman 
29047ba1a0bfSKris Buschelman   /* Allocate space for ca */
29059566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
29067ba1a0bfSKris Buschelman 
29077ba1a0bfSKris Buschelman   /* put together the new matrix */
29089566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
29099566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C, pp->dof));
29107ba1a0bfSKris Buschelman 
29117ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29127ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29134222ddf1SHong Zhang   c          = (Mat_SeqAIJ *)(C->data);
2914e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2915e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29167ba1a0bfSKris Buschelman   c->nonew   = 0;
291726fbe8dcSKarl Rupp 
29184222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29194222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29207ba1a0bfSKris Buschelman 
29217ba1a0bfSKris Buschelman   /* Clean up. */
29229566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
29237ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29247ba1a0bfSKris Buschelman }
29257ba1a0bfSKris Buschelman 
29269371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) {
29277ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29287ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
29297ba1a0bfSKris Buschelman   Mat              P  = pp->AIJ;
29307ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
29317ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
29327ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2933a2ea699eSBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2934d2840e09SBarry Smith   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2935d2840e09SBarry Smith   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2936d2840e09SBarry Smith   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2937a2ea699eSBarry Smith   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2938d2840e09SBarry Smith   MatScalar       *ca = c->a, *caj, *apa;
29397ba1a0bfSKris Buschelman 
29407ba1a0bfSKris Buschelman   PetscFunctionBegin;
29417ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
29429566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
29437ba1a0bfSKris Buschelman 
29447ba1a0bfSKris Buschelman   /* Clear old values in C */
29459566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca, ci[cm]));
29467ba1a0bfSKris Buschelman 
29477ba1a0bfSKris Buschelman   for (i = 0; i < am; i++) {
29487ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
29497ba1a0bfSKris Buschelman     anzi  = ai[i + 1] - ai[i];
29507ba1a0bfSKris Buschelman     apnzj = 0;
29517ba1a0bfSKris Buschelman     for (j = 0; j < anzi; j++) {
29527ba1a0bfSKris Buschelman       /* Get offset within block of P */
29537ba1a0bfSKris Buschelman       pshift = *aj % ppdof;
29547ba1a0bfSKris Buschelman       /* Get block row of P */
29557ba1a0bfSKris Buschelman       prow   = *aj++ / ppdof; /* integer division */
29567ba1a0bfSKris Buschelman       pnzj   = pi[prow + 1] - pi[prow];
29577ba1a0bfSKris Buschelman       pjj    = pj + pi[prow];
29587ba1a0bfSKris Buschelman       paj    = pa + pi[prow];
29597ba1a0bfSKris Buschelman       for (k = 0; k < pnzj; k++) {
29607ba1a0bfSKris Buschelman         poffset = pjj[k] * ppdof + pshift;
29617ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
29627ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
29637ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
29647ba1a0bfSKris Buschelman         }
29657ba1a0bfSKris Buschelman         apa[poffset] += (*aa) * paj[k];
29667ba1a0bfSKris Buschelman       }
29679566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * pnzj));
29687ba1a0bfSKris Buschelman       aa++;
29697ba1a0bfSKris Buschelman     }
29707ba1a0bfSKris Buschelman 
29717ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
29727ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
29739566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj, apj));
29747ba1a0bfSKris Buschelman 
29757ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
29767ba1a0bfSKris Buschelman     prow    = i / ppdof; /* integer division */
29777ba1a0bfSKris Buschelman     pshift  = i % ppdof;
29787ba1a0bfSKris Buschelman     poffset = pi[prow];
29797ba1a0bfSKris Buschelman     pnzi    = pi[prow + 1] - poffset;
29807ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
29817ba1a0bfSKris Buschelman     pJ      = pj + poffset;
29827ba1a0bfSKris Buschelman     pA      = pa + poffset;
29837ba1a0bfSKris Buschelman     for (j = 0; j < pnzi; j++) {
29847ba1a0bfSKris Buschelman       crow = (*pJ) * ppdof + pshift;
29857ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
29867ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
29877ba1a0bfSKris Buschelman       pJ++;
29887ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
29897ba1a0bfSKris Buschelman       for (k = 0, nextap = 0; nextap < apnzj; k++) {
299026fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
29917ba1a0bfSKris Buschelman       }
29929566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * apnzj));
29937ba1a0bfSKris Buschelman       pA++;
29947ba1a0bfSKris Buschelman     }
29957ba1a0bfSKris Buschelman 
29967ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
29977ba1a0bfSKris Buschelman     for (j = 0; j < apnzj; j++) {
29987ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
29997ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30007ba1a0bfSKris Buschelman     }
30017ba1a0bfSKris Buschelman   }
30027ba1a0bfSKris Buschelman 
30037ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
30059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
30069566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa, apj, apjdense));
300795ddefa2SHong Zhang   PetscFunctionReturn(0);
300895ddefa2SHong Zhang }
30097ba1a0bfSKris Buschelman 
30109371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) {
30114222ddf1SHong Zhang   Mat_Product *product = C->product;
30124222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30132121bac1SHong Zhang 
30142121bac1SHong Zhang   PetscFunctionBegin;
30159566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
30162121bac1SHong Zhang   PetscFunctionReturn(0);
30172121bac1SHong Zhang }
30182121bac1SHong Zhang 
30199371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) {
302095ddefa2SHong Zhang   PetscFunctionBegin;
30213e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
302295ddefa2SHong Zhang }
302395ddefa2SHong Zhang 
30249371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) {
302595ddefa2SHong Zhang   PetscFunctionBegin;
30263e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
30277ba1a0bfSKris Buschelman }
30285c65b9ecSFande Kong 
3029bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3030bc8e477aSFande Kong 
30319371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) {
3032bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3033bc8e477aSFande Kong 
3034bc8e477aSFande Kong   PetscFunctionBegin;
303534bcad68SFande Kong 
30369566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3037bc8e477aSFande Kong   PetscFunctionReturn(0);
3038bc8e477aSFande Kong }
3039bc8e477aSFande Kong 
30404222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3041bc8e477aSFande Kong 
30429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) {
3043bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3044bc8e477aSFande Kong 
3045bc8e477aSFande Kong   PetscFunctionBegin;
30469566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
30474222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3048bc8e477aSFande Kong   PetscFunctionReturn(0);
3049bc8e477aSFande Kong }
3050bc8e477aSFande Kong 
3051bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3052bc8e477aSFande Kong 
30539371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) {
3054bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3055bc8e477aSFande Kong 
3056bc8e477aSFande Kong   PetscFunctionBegin;
305734bcad68SFande Kong 
30589566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3059bc8e477aSFande Kong   PetscFunctionReturn(0);
3060bc8e477aSFande Kong }
3061bc8e477aSFande Kong 
30624222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3063bc8e477aSFande Kong 
30649371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) {
3065bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3066bc8e477aSFande Kong 
3067bc8e477aSFande Kong   PetscFunctionBegin;
306834bcad68SFande Kong 
30699566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
30704222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3071bc8e477aSFande Kong   PetscFunctionReturn(0);
3072bc8e477aSFande Kong }
3073bc8e477aSFande Kong 
30749371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) {
30754222ddf1SHong Zhang   Mat_Product *product = C->product;
30764222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30774222ddf1SHong Zhang   PetscBool    flg;
3078bc8e477aSFande Kong 
3079bc8e477aSFande Kong   PetscFunctionBegin;
30809566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
30814222ddf1SHong Zhang   if (flg) {
30829566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
30834222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
30844222ddf1SHong Zhang     PetscFunctionReturn(0);
3085bc8e477aSFande Kong   }
308634bcad68SFande Kong 
30879566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
30884222ddf1SHong Zhang   if (flg) {
30899566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
30904222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
30914222ddf1SHong Zhang     PetscFunctionReturn(0);
30924222ddf1SHong Zhang   }
309334bcad68SFande Kong 
30944222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3095bc8e477aSFande Kong }
3096bc8e477aSFande Kong 
30979371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
30989c4fc4c7SBarry Smith   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3099ceb03754SKris Buschelman   Mat          a   = b->AIJ, B;
31009c4fc4c7SBarry Smith   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
31010fd73130SBarry Smith   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3102ba8c8a56SBarry Smith   PetscInt    *cols;
3103ba8c8a56SBarry Smith   PetscScalar *vals;
31049c4fc4c7SBarry Smith 
31059c4fc4c7SBarry Smith   PetscFunctionBegin;
31069566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a, &m, &n));
31079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof * m, &ilen));
31089c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31099c4fc4c7SBarry Smith     nmax = PetscMax(nmax, aij->ilen[i]);
311026fbe8dcSKarl Rupp     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
31119c4fc4c7SBarry Smith   }
31129566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
31139566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
31149566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31159566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
31169566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax, &icols));
31189c4fc4c7SBarry Smith   ii = 0;
31199c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31209566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31210fd73130SBarry Smith     for (j = 0; j < dof; j++) {
312226fbe8dcSKarl Rupp       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
31239566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
31249c4fc4c7SBarry Smith       ii++;
31259c4fc4c7SBarry Smith     }
31269566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31279c4fc4c7SBarry Smith   }
31289566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
31299566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
31309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3131ceb03754SKris Buschelman 
3132511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
31339566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
3134c3d102feSKris Buschelman   } else {
3135ceb03754SKris Buschelman     *newmat = B;
3136c3d102feSKris Buschelman   }
31379c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31389c4fc4c7SBarry Smith }
31399c4fc4c7SBarry Smith 
3140c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3141be1d678aSKris Buschelman 
31429371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
31430fd73130SBarry Smith   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3144ceb03754SKris Buschelman   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
31450fd73130SBarry Smith   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
31460fd73130SBarry Smith   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
31470fd73130SBarry Smith   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
31480fd73130SBarry Smith   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
31490298fd71SBarry Smith   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
31500298fd71SBarry Smith   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
31510fd73130SBarry Smith   PetscInt     rstart, cstart, *garray, ii, k;
31520fd73130SBarry Smith   PetscScalar *vals, *ovals;
31530fd73130SBarry Smith 
31540fd73130SBarry Smith   PetscFunctionBegin;
31559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3156d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31570fd73130SBarry Smith     nmax  = PetscMax(nmax, AIJ->ilen[i]);
31580fd73130SBarry Smith     onmax = PetscMax(onmax, OAIJ->ilen[i]);
31590fd73130SBarry Smith     for (j = 0; j < dof; j++) {
31600fd73130SBarry Smith       dnz[dof * i + j] = AIJ->ilen[i];
31610fd73130SBarry Smith       onz[dof * i + j] = OAIJ->ilen[i];
31620fd73130SBarry Smith     }
31630fd73130SBarry Smith   }
31649566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
31659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
31669566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31679566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
31689566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, dof));
31699566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz, onz));
31700fd73130SBarry Smith 
31719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3172d0f46423SBarry Smith   rstart = dof * maij->A->rmap->rstart;
3173d0f46423SBarry Smith   cstart = dof * maij->A->cmap->rstart;
31740fd73130SBarry Smith   garray = mpiaij->garray;
31750fd73130SBarry Smith 
31760fd73130SBarry Smith   ii = rstart;
3177d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31789566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
31799566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
31800fd73130SBarry Smith     for (j = 0; j < dof; j++) {
3181ad540459SPierre Jolivet       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3182ad540459SPierre Jolivet       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
31839566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
31849566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
31850fd73130SBarry Smith       ii++;
31860fd73130SBarry Smith     }
31879566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
31889566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
31890fd73130SBarry Smith   }
31909566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols, oicols));
31910fd73130SBarry Smith 
31929566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
31939566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3194ceb03754SKris Buschelman 
3195511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
31967adad957SLisandro Dalcin     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
31977adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
319826fbe8dcSKarl Rupp 
31999566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
320026fbe8dcSKarl Rupp 
32017adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3202c3d102feSKris Buschelman   } else {
3203ceb03754SKris Buschelman     *newmat = B;
3204c3d102feSKris Buschelman   }
32050fd73130SBarry Smith   PetscFunctionReturn(0);
32060fd73130SBarry Smith }
32070fd73130SBarry Smith 
32089371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
32099e516c8fSBarry Smith   Mat A;
32109e516c8fSBarry Smith 
32119e516c8fSBarry Smith   PetscFunctionBegin;
32129566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32139566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
32149566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
32159e516c8fSBarry Smith   PetscFunctionReturn(0);
32169e516c8fSBarry Smith }
32170fd73130SBarry Smith 
32189371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) {
3219ec626240SStefano Zampini   Mat A;
3220ec626240SStefano Zampini 
3221ec626240SStefano Zampini   PetscFunctionBegin;
32229566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32239566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
32249566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3225ec626240SStefano Zampini   PetscFunctionReturn(0);
3226ec626240SStefano Zampini }
3227ec626240SStefano Zampini 
3228bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3229480dffcfSJed Brown /*@
32300bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32310bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
3232*11a5261eSBarry Smith   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
3233*11a5261eSBarry Smith   and `MATMPIAIJ` for distributed matrices.
32340bad9183SKris Buschelman 
3235ff585edeSJed Brown   Collective
3236ff585edeSJed Brown 
3237ff585edeSJed Brown   Input Parameters:
3238*11a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks
3239ff585edeSJed Brown - dof - the block size (number of components per node)
3240ff585edeSJed Brown 
3241ff585edeSJed Brown   Output Parameter:
3242*11a5261eSBarry Smith . maij - the new `MATMAIJ` matrix
3243ff585edeSJed Brown 
32440bad9183SKris Buschelman   Operations provided:
324567b8a455SSatish Balay .vb
3246*11a5261eSBarry Smith     MatMult()
3247*11a5261eSBarry Smith     MatMultTranspose()
3248*11a5261eSBarry Smith     MatMultAdd()
3249*11a5261eSBarry Smith     MatMultTransposeAdd()
3250*11a5261eSBarry Smith     MatView()
325167b8a455SSatish Balay .ve
32520bad9183SKris Buschelman 
32530bad9183SKris Buschelman   Level: advanced
32540bad9183SKris Buschelman 
3255*11a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3256ff585edeSJed Brown @*/
32579371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) {
3258b24ad042SBarry Smith   PetscInt  n;
325982b1193eSBarry Smith   Mat       B;
326090f67b22SBarry Smith   PetscBool flg;
3261fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3262fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3263fdc842d1SBarry Smith   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3264fdc842d1SBarry Smith #endif
326582b1193eSBarry Smith 
326682b1193eSBarry Smith   PetscFunctionBegin;
3267fdc842d1SBarry Smith   dof = PetscAbs(dof);
32689566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3269d72c5c08SBarry Smith 
327026fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
327126fbe8dcSKarl Rupp   else {
32729566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3273c248e2fdSStefano Zampini     /* propagate vec type */
32749566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B, A->defaultvectype));
32759566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
32769566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
32779566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
32789566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
32799566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
328026fbe8dcSKarl Rupp 
3281cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3282d72c5c08SBarry Smith 
328390f67b22SBarry Smith     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
328490f67b22SBarry Smith     if (flg) {
3285feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3286feb9fe23SJed Brown 
32879566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATSEQMAIJ));
328826fbe8dcSKarl Rupp 
32890298fd71SBarry Smith       B->ops->setup   = NULL;
32904cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
32910fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
32924222ddf1SHong Zhang 
3293feb9fe23SJed Brown       b      = (Mat_SeqMAIJ *)B->data;
3294b9b97703SBarry Smith       b->dof = dof;
32954cb9d9b8SBarry Smith       b->AIJ = A;
329626fbe8dcSKarl Rupp 
3297d72c5c08SBarry Smith       if (dof == 2) {
3298cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3299cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3300cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3301cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3302bcc973b7SBarry Smith       } else if (dof == 3) {
3303bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3304bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3305bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3306bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3307bcc973b7SBarry Smith       } else if (dof == 4) {
3308bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3309bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3310bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3311bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3312f9fae5adSBarry Smith       } else if (dof == 5) {
3313f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3314f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3315f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3316f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33176cd98798SBarry Smith       } else if (dof == 6) {
33186cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33196cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33206cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33216cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3322ed8eea03SBarry Smith       } else if (dof == 7) {
3323ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3324ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3325ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3326ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
332766ed3db0SBarry Smith       } else if (dof == 8) {
332866ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
332966ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
333066ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
333166ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33322b692628SMatthew Knepley       } else if (dof == 9) {
33332b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33342b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33352b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33362b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3337b9cda22cSBarry Smith       } else if (dof == 10) {
3338b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3339b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3340b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3341b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3342dbdd7285SBarry Smith       } else if (dof == 11) {
3343dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3344dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3345dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3346dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33472f7816d4SBarry Smith       } else if (dof == 16) {
33482f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33492f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33502f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33512f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3352ed1c418cSBarry Smith       } else if (dof == 18) {
3353ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3354ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3355ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3356ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
335782b1193eSBarry Smith       } else {
33586861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33596861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33606861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33616861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
336282b1193eSBarry Smith       }
3363fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
33649566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3365fdc842d1SBarry Smith #endif
33669566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
33679566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3368f4a53059SBarry Smith     } else {
3369f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3370feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3371f4a53059SBarry Smith       IS           from, to;
3372f4a53059SBarry Smith       Vec          gvec;
3373f4a53059SBarry Smith 
33749566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATMPIMAIJ));
337526fbe8dcSKarl Rupp 
33760298fd71SBarry Smith       B->ops->setup   = NULL;
3377d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
33780fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
337926fbe8dcSKarl Rupp 
3380b9b97703SBarry Smith       b      = (Mat_MPIMAIJ *)B->data;
3381b9b97703SBarry Smith       b->dof = dof;
3382b9b97703SBarry Smith       b->A   = A;
338326fbe8dcSKarl Rupp 
33849566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
33859566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
33864cb9d9b8SBarry Smith 
33879566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec, &n));
33889566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
33899566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
33909566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w, dof));
33919566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w, VECSEQ));
3392f4a53059SBarry Smith 
3393f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
33949566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
33959566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3396f4a53059SBarry Smith 
3397f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
33989566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3399f4a53059SBarry Smith 
3400f4a53059SBarry Smith       /* generate the scatter context */
34019566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3402f4a53059SBarry Smith 
34039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34059566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3406f4a53059SBarry Smith 
3407f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34084cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34094cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34104cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
341126fbe8dcSKarl Rupp 
3412fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34139566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3414fdc842d1SBarry Smith #endif
34159566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
34169566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3417f4a53059SBarry Smith     }
34187dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3419ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
34209566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3421fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3422fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3423fdc842d1SBarry Smith     {
3424fdc842d1SBarry Smith       PetscBool flg;
3425fdc842d1SBarry Smith       if (convert) {
34269566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
342748a46eb9SPierre Jolivet         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3428fdc842d1SBarry Smith       }
3429fdc842d1SBarry Smith     }
3430fdc842d1SBarry Smith #endif
3431cd3bbe55SBarry Smith     *maij = B;
3432d72c5c08SBarry Smith   }
343382b1193eSBarry Smith   PetscFunctionReturn(0);
343482b1193eSBarry Smith }
3435