xref: /petsc/src/mat/impls/maij/maij.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1be1d678aSKris Buschelman 
2c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
482b1193eSBarry Smith 
5b350b9acSSatish Balay /*@
611a5261eSBarry Smith    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7ff585edeSJed Brown 
811a5261eSBarry Smith    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9ff585edeSJed Brown 
10ff585edeSJed Brown    Input Parameter:
1111a5261eSBarry Smith .  A - the `MATMAIJ` matrix
12ff585edeSJed Brown 
13ff585edeSJed Brown    Output Parameter:
1411a5261eSBarry Smith .  B - the `MATAIJ` matrix
15ff585edeSJed Brown 
16ff585edeSJed Brown    Level: advanced
17ff585edeSJed Brown 
1811a5261eSBarry Smith    Note:
1911a5261eSBarry Smith     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20ff585edeSJed Brown 
2111a5261eSBarry Smith .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22ff585edeSJed Brown @*/
23*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24*d71ae5a4SJacob Faibussowitsch {
25ace3abfcSBarry Smith   PetscBool ismpimaij, isseqmaij;
26b9b97703SBarry Smith 
27b9b97703SBarry Smith   PetscFunctionBegin;
289566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30b9b97703SBarry Smith   if (ismpimaij) {
31b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32b9b97703SBarry Smith 
33b9b97703SBarry Smith     *B = b->A;
34b9b97703SBarry Smith   } else if (isseqmaij) {
35b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36b9b97703SBarry Smith 
37b9b97703SBarry Smith     *B = b->AIJ;
38b9b97703SBarry Smith   } else {
39b9b97703SBarry Smith     *B = A;
40b9b97703SBarry Smith   }
41b9b97703SBarry Smith   PetscFunctionReturn(0);
42b9b97703SBarry Smith }
43b9b97703SBarry Smith 
44480dffcfSJed Brown /*@
4511a5261eSBarry Smith    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46ff585edeSJed Brown 
473f9fe445SBarry Smith    Logically Collective
48ff585edeSJed Brown 
49d8d19677SJose E. Roman    Input Parameters:
5011a5261eSBarry Smith +  A - the `MATMAIJ` matrix
51ff585edeSJed Brown -  dof - the block size for the new matrix
52ff585edeSJed Brown 
53ff585edeSJed Brown    Output Parameter:
5411a5261eSBarry Smith .  B - the new `MATMAIJ` matrix
55ff585edeSJed Brown 
56ff585edeSJed Brown    Level: advanced
57ff585edeSJed Brown 
5811a5261eSBarry Smith .seealso: `MATMAIJ`, `MatCreateMAIJ()`
59ff585edeSJed Brown @*/
60*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61*d71ae5a4SJacob Faibussowitsch {
620298fd71SBarry Smith   Mat Aij = NULL;
63b9b97703SBarry Smith 
64b9b97703SBarry Smith   PetscFunctionBegin;
65c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A, dof, 2);
669566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A, &Aij));
679566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij, dof, B));
68b9b97703SBarry Smith   PetscFunctionReturn(0);
69b9b97703SBarry Smith }
70b9b97703SBarry Smith 
71*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72*d71ae5a4SJacob Faibussowitsch {
734cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
7482b1193eSBarry Smith 
7582b1193eSBarry Smith   PetscFunctionBegin;
769566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
779566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
782e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
809566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
814cb9d9b8SBarry Smith   PetscFunctionReturn(0);
824cb9d9b8SBarry Smith }
834cb9d9b8SBarry Smith 
84*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MAIJ(Mat A)
85*d71ae5a4SJacob Faibussowitsch {
86356c569eSBarry Smith   PetscFunctionBegin;
87ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88356c569eSBarry Smith }
89356c569eSBarry Smith 
90*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91*d71ae5a4SJacob Faibussowitsch {
920fd73130SBarry Smith   Mat B;
930fd73130SBarry Smith 
940fd73130SBarry Smith   PetscFunctionBegin;
959566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
969566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
979566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
980fd73130SBarry Smith   PetscFunctionReturn(0);
990fd73130SBarry Smith }
1000fd73130SBarry Smith 
101*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102*d71ae5a4SJacob Faibussowitsch {
1030fd73130SBarry Smith   Mat B;
1040fd73130SBarry Smith 
1050fd73130SBarry Smith   PetscFunctionBegin;
1069566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1079566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1089566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1090fd73130SBarry Smith   PetscFunctionReturn(0);
1100fd73130SBarry Smith }
1110fd73130SBarry Smith 
112*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113*d71ae5a4SJacob Faibussowitsch {
1144cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1154cb9d9b8SBarry Smith 
1164cb9d9b8SBarry Smith   PetscFunctionBegin;
1179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1189566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1199566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1209566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1219566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1229566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1232e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1269566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127b9b97703SBarry Smith   PetscFunctionReturn(0);
128b9b97703SBarry Smith }
129b9b97703SBarry Smith 
1300bad9183SKris Buschelman /*MC
131fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1320bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
13311a5261eSBarry Smith   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
1340bad9183SKris Buschelman 
1350bad9183SKris Buschelman   Operations provided:
13667b8a455SSatish Balay .vb
13711a5261eSBarry Smith     MatMult()
13811a5261eSBarry Smith     MatMultTranspose()
13911a5261eSBarry Smith     MatMultAdd()
14011a5261eSBarry Smith     MatMultTransposeAdd()
14167b8a455SSatish Balay .ve
1420bad9183SKris Buschelman 
1430bad9183SKris Buschelman   Level: advanced
1440bad9183SKris Buschelman 
14511a5261eSBarry Smith .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1460bad9183SKris Buschelman M*/
1470bad9183SKris Buschelman 
148*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149*d71ae5a4SJacob Faibussowitsch {
1504cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
15164b52464SHong Zhang   PetscMPIInt  size;
15282b1193eSBarry Smith 
15382b1193eSBarry Smith   PetscFunctionBegin;
1544dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
155b0a32e0cSBarry Smith   A->data = (void *)b;
15626fbe8dcSKarl Rupp 
1579566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
15826fbe8dcSKarl Rupp 
159356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
160f4a53059SBarry Smith 
161f4259b30SLisandro Dalcin   b->AIJ  = NULL;
162cd3bbe55SBarry Smith   b->dof  = 0;
163f4259b30SLisandro Dalcin   b->OAIJ = NULL;
164f4259b30SLisandro Dalcin   b->ctx  = NULL;
165f4259b30SLisandro Dalcin   b->w    = NULL;
1669566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
16764b52464SHong Zhang   if (size == 1) {
1689566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
16964b52464SHong Zhang   } else {
1709566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
17164b52464SHong Zhang   }
17232e7c8b0SBarry Smith   A->preallocated = PETSC_TRUE;
17332e7c8b0SBarry Smith   A->assembled    = PETSC_TRUE;
17482b1193eSBarry Smith   PetscFunctionReturn(0);
17582b1193eSBarry Smith }
17682b1193eSBarry Smith 
177cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
178*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
179*d71ae5a4SJacob Faibussowitsch {
1804cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
181bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
182d2840e09SBarry Smith   const PetscScalar *x, *v;
183d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
184d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
185d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
18682b1193eSBarry Smith 
187bcc973b7SBarry Smith   PetscFunctionBegin;
1889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
190bcc973b7SBarry Smith   idx = a->j;
191bcc973b7SBarry Smith   v   = a->a;
192bcc973b7SBarry Smith   ii  = a->i;
193bcc973b7SBarry Smith 
194bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
195bcc973b7SBarry Smith     jrow = ii[i];
196bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
197bcc973b7SBarry Smith     sum1 = 0.0;
198bcc973b7SBarry Smith     sum2 = 0.0;
19926fbe8dcSKarl Rupp 
200b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
201bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
202bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
203bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
204bcc973b7SBarry Smith       jrow++;
205bcc973b7SBarry Smith     }
206bcc973b7SBarry Smith     y[2 * i]     = sum1;
207bcc973b7SBarry Smith     y[2 * i + 1] = sum2;
208bcc973b7SBarry Smith   }
209bcc973b7SBarry Smith 
2109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
2119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21382b1193eSBarry Smith   PetscFunctionReturn(0);
21482b1193eSBarry Smith }
215bcc973b7SBarry Smith 
216*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
217*d71ae5a4SJacob Faibussowitsch {
2184cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
219bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
220d2840e09SBarry Smith   const PetscScalar *x, *v;
221d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
222d2840e09SBarry Smith   PetscInt           n, i;
223d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
22482b1193eSBarry Smith 
225bcc973b7SBarry Smith   PetscFunctionBegin;
2269566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
2279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2289566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
229bfec09a0SHong Zhang 
230bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
231bfec09a0SHong Zhang     idx    = a->j + a->i[i];
232bfec09a0SHong Zhang     v      = a->a + a->i[i];
233bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
234bcc973b7SBarry Smith     alpha1 = x[2 * i];
235bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
23626fbe8dcSKarl Rupp     while (n-- > 0) {
23726fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
23826fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2399371c9d4SSatish Balay       idx++;
2409371c9d4SSatish Balay       v++;
24126fbe8dcSKarl Rupp     }
242bcc973b7SBarry Smith   }
2439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
24682b1193eSBarry Smith   PetscFunctionReturn(0);
24782b1193eSBarry Smith }
248bcc973b7SBarry Smith 
249*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
250*d71ae5a4SJacob Faibussowitsch {
2514cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
252bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
253d2840e09SBarry Smith   const PetscScalar *x, *v;
254d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
255b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
256d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
25782b1193eSBarry Smith 
258bcc973b7SBarry Smith   PetscFunctionBegin;
2599566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2619566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
262bcc973b7SBarry Smith   idx = a->j;
263bcc973b7SBarry Smith   v   = a->a;
264bcc973b7SBarry Smith   ii  = a->i;
265bcc973b7SBarry Smith 
266bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
267bcc973b7SBarry Smith     jrow = ii[i];
268bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
269bcc973b7SBarry Smith     sum1 = 0.0;
270bcc973b7SBarry Smith     sum2 = 0.0;
271bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
272bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
273bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
274bcc973b7SBarry Smith       jrow++;
275bcc973b7SBarry Smith     }
276bcc973b7SBarry Smith     y[2 * i] += sum1;
277bcc973b7SBarry Smith     y[2 * i + 1] += sum2;
278bcc973b7SBarry Smith   }
279bcc973b7SBarry Smith 
2809566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2829566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
283bcc973b7SBarry Smith   PetscFunctionReturn(0);
28482b1193eSBarry Smith }
285*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
286*d71ae5a4SJacob Faibussowitsch {
2874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
288bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
289d2840e09SBarry Smith   const PetscScalar *x, *v;
290d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
291d2840e09SBarry Smith   PetscInt           n, i;
292d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
29382b1193eSBarry Smith 
294bcc973b7SBarry Smith   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
298bfec09a0SHong Zhang 
299bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
300bfec09a0SHong Zhang     idx    = a->j + a->i[i];
301bfec09a0SHong Zhang     v      = a->a + a->i[i];
302bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
303bcc973b7SBarry Smith     alpha1 = x[2 * i];
304bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
30526fbe8dcSKarl Rupp     while (n-- > 0) {
30626fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
30726fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
3089371c9d4SSatish Balay       idx++;
3099371c9d4SSatish Balay       v++;
31026fbe8dcSKarl Rupp     }
311bcc973b7SBarry Smith   }
3129566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
3139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
315bcc973b7SBarry Smith   PetscFunctionReturn(0);
31682b1193eSBarry Smith }
317cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
318*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
319*d71ae5a4SJacob Faibussowitsch {
3204cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
321bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
322d2840e09SBarry Smith   const PetscScalar *x, *v;
323d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
324d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
325d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
32682b1193eSBarry Smith 
327bcc973b7SBarry Smith   PetscFunctionBegin;
3289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
330bcc973b7SBarry Smith   idx = a->j;
331bcc973b7SBarry Smith   v   = a->a;
332bcc973b7SBarry Smith   ii  = a->i;
333bcc973b7SBarry Smith 
334bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
335bcc973b7SBarry Smith     jrow = ii[i];
336bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
337bcc973b7SBarry Smith     sum1 = 0.0;
338bcc973b7SBarry Smith     sum2 = 0.0;
339bcc973b7SBarry Smith     sum3 = 0.0;
34026fbe8dcSKarl Rupp 
341b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
342bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
343bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
344bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
345bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
346bcc973b7SBarry Smith       jrow++;
347bcc973b7SBarry Smith     }
348bcc973b7SBarry Smith     y[3 * i]     = sum1;
349bcc973b7SBarry Smith     y[3 * i + 1] = sum2;
350bcc973b7SBarry Smith     y[3 * i + 2] = sum3;
351bcc973b7SBarry Smith   }
352bcc973b7SBarry Smith 
3539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
3549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
356bcc973b7SBarry Smith   PetscFunctionReturn(0);
357bcc973b7SBarry Smith }
358bcc973b7SBarry Smith 
359*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
360*d71ae5a4SJacob Faibussowitsch {
3614cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
362bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
363d2840e09SBarry Smith   const PetscScalar *x, *v;
364d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
365d2840e09SBarry Smith   PetscInt           n, i;
366d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
367bcc973b7SBarry Smith 
368bcc973b7SBarry Smith   PetscFunctionBegin;
3699566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
3709566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3719566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
372bfec09a0SHong Zhang 
373bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
374bfec09a0SHong Zhang     idx    = a->j + a->i[i];
375bfec09a0SHong Zhang     v      = a->a + a->i[i];
376bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
377bcc973b7SBarry Smith     alpha1 = x[3 * i];
378bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
379bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
380bcc973b7SBarry Smith     while (n-- > 0) {
381bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
382bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
383bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
3849371c9d4SSatish Balay       idx++;
3859371c9d4SSatish Balay       v++;
386bcc973b7SBarry Smith     }
387bcc973b7SBarry Smith   }
3889566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
3899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
391bcc973b7SBarry Smith   PetscFunctionReturn(0);
392bcc973b7SBarry Smith }
393bcc973b7SBarry Smith 
394*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
395*d71ae5a4SJacob Faibussowitsch {
3964cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
397bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
398d2840e09SBarry Smith   const PetscScalar *x, *v;
399d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
400b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
401d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
402bcc973b7SBarry Smith 
403bcc973b7SBarry Smith   PetscFunctionBegin;
4049566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4059566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4069566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
407bcc973b7SBarry Smith   idx = a->j;
408bcc973b7SBarry Smith   v   = a->a;
409bcc973b7SBarry Smith   ii  = a->i;
410bcc973b7SBarry Smith 
411bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
412bcc973b7SBarry Smith     jrow = ii[i];
413bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
414bcc973b7SBarry Smith     sum1 = 0.0;
415bcc973b7SBarry Smith     sum2 = 0.0;
416bcc973b7SBarry Smith     sum3 = 0.0;
417bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
418bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
419bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
420bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
421bcc973b7SBarry Smith       jrow++;
422bcc973b7SBarry Smith     }
423bcc973b7SBarry Smith     y[3 * i] += sum1;
424bcc973b7SBarry Smith     y[3 * i + 1] += sum2;
425bcc973b7SBarry Smith     y[3 * i + 2] += sum3;
426bcc973b7SBarry Smith   }
427bcc973b7SBarry Smith 
4289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
431bcc973b7SBarry Smith   PetscFunctionReturn(0);
432bcc973b7SBarry Smith }
433*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
434*d71ae5a4SJacob Faibussowitsch {
4354cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
436bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
437d2840e09SBarry Smith   const PetscScalar *x, *v;
438d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
439d2840e09SBarry Smith   PetscInt           n, i;
440d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
441bcc973b7SBarry Smith 
442bcc973b7SBarry Smith   PetscFunctionBegin;
4439566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4459566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
446bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
447bfec09a0SHong Zhang     idx    = a->j + a->i[i];
448bfec09a0SHong Zhang     v      = a->a + a->i[i];
449bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
450bcc973b7SBarry Smith     alpha1 = x[3 * i];
451bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
452bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
453bcc973b7SBarry Smith     while (n-- > 0) {
454bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
455bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
456bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
4579371c9d4SSatish Balay       idx++;
4589371c9d4SSatish Balay       v++;
459bcc973b7SBarry Smith     }
460bcc973b7SBarry Smith   }
4619566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
464bcc973b7SBarry Smith   PetscFunctionReturn(0);
465bcc973b7SBarry Smith }
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
468*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
469*d71ae5a4SJacob Faibussowitsch {
4704cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
471bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
472d2840e09SBarry Smith   const PetscScalar *x, *v;
473d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
474d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
475d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
476bcc973b7SBarry Smith 
477bcc973b7SBarry Smith   PetscFunctionBegin;
4789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
480bcc973b7SBarry Smith   idx = a->j;
481bcc973b7SBarry Smith   v   = a->a;
482bcc973b7SBarry Smith   ii  = a->i;
483bcc973b7SBarry Smith 
484bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
485bcc973b7SBarry Smith     jrow = ii[i];
486bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
487bcc973b7SBarry Smith     sum1 = 0.0;
488bcc973b7SBarry Smith     sum2 = 0.0;
489bcc973b7SBarry Smith     sum3 = 0.0;
490bcc973b7SBarry Smith     sum4 = 0.0;
491b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
492bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
493bcc973b7SBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
494bcc973b7SBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
495bcc973b7SBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
496bcc973b7SBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
497bcc973b7SBarry Smith       jrow++;
498bcc973b7SBarry Smith     }
499bcc973b7SBarry Smith     y[4 * i]     = sum1;
500bcc973b7SBarry Smith     y[4 * i + 1] = sum2;
501bcc973b7SBarry Smith     y[4 * i + 2] = sum3;
502bcc973b7SBarry Smith     y[4 * i + 3] = sum4;
503bcc973b7SBarry Smith   }
504bcc973b7SBarry Smith 
5059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
5069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
508bcc973b7SBarry Smith   PetscFunctionReturn(0);
509bcc973b7SBarry Smith }
510bcc973b7SBarry Smith 
511*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
512*d71ae5a4SJacob Faibussowitsch {
5134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
514bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
515d2840e09SBarry Smith   const PetscScalar *x, *v;
516d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
517d2840e09SBarry Smith   PetscInt           n, i;
518d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
519bcc973b7SBarry Smith 
520bcc973b7SBarry Smith   PetscFunctionBegin;
5219566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
5229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
524bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
525bfec09a0SHong Zhang     idx    = a->j + a->i[i];
526bfec09a0SHong Zhang     v      = a->a + a->i[i];
527bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
528bcc973b7SBarry Smith     alpha1 = x[4 * i];
529bcc973b7SBarry Smith     alpha2 = x[4 * i + 1];
530bcc973b7SBarry Smith     alpha3 = x[4 * i + 2];
531bcc973b7SBarry Smith     alpha4 = x[4 * i + 3];
532bcc973b7SBarry Smith     while (n-- > 0) {
533bcc973b7SBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
534bcc973b7SBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
535bcc973b7SBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
536bcc973b7SBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5379371c9d4SSatish Balay       idx++;
5389371c9d4SSatish Balay       v++;
539bcc973b7SBarry Smith     }
540bcc973b7SBarry Smith   }
5419566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
544bcc973b7SBarry Smith   PetscFunctionReturn(0);
545bcc973b7SBarry Smith }
546bcc973b7SBarry Smith 
547*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
548*d71ae5a4SJacob Faibussowitsch {
5494cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
550f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
551d2840e09SBarry Smith   const PetscScalar *x, *v;
552d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
553b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
554d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
555f9fae5adSBarry Smith 
556f9fae5adSBarry Smith   PetscFunctionBegin;
5579566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
560f9fae5adSBarry Smith   idx = a->j;
561f9fae5adSBarry Smith   v   = a->a;
562f9fae5adSBarry Smith   ii  = a->i;
563f9fae5adSBarry Smith 
564f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
565f9fae5adSBarry Smith     jrow = ii[i];
566f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
567f9fae5adSBarry Smith     sum1 = 0.0;
568f9fae5adSBarry Smith     sum2 = 0.0;
569f9fae5adSBarry Smith     sum3 = 0.0;
570f9fae5adSBarry Smith     sum4 = 0.0;
571f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
572f9fae5adSBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
573f9fae5adSBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
574f9fae5adSBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
575f9fae5adSBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
576f9fae5adSBarry Smith       jrow++;
577f9fae5adSBarry Smith     }
578f9fae5adSBarry Smith     y[4 * i] += sum1;
579f9fae5adSBarry Smith     y[4 * i + 1] += sum2;
580f9fae5adSBarry Smith     y[4 * i + 2] += sum3;
581f9fae5adSBarry Smith     y[4 * i + 3] += sum4;
582f9fae5adSBarry Smith   }
583f9fae5adSBarry Smith 
5849566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
587f9fae5adSBarry Smith   PetscFunctionReturn(0);
588bcc973b7SBarry Smith }
589*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
590*d71ae5a4SJacob Faibussowitsch {
5914cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
592f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
593d2840e09SBarry Smith   const PetscScalar *x, *v;
594d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
595d2840e09SBarry Smith   PetscInt           n, i;
596d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
597f9fae5adSBarry Smith 
598f9fae5adSBarry Smith   PetscFunctionBegin;
5999566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
6009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6019566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
602bfec09a0SHong Zhang 
603f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
604bfec09a0SHong Zhang     idx    = a->j + a->i[i];
605bfec09a0SHong Zhang     v      = a->a + a->i[i];
606f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
607f9fae5adSBarry Smith     alpha1 = x[4 * i];
608f9fae5adSBarry Smith     alpha2 = x[4 * i + 1];
609f9fae5adSBarry Smith     alpha3 = x[4 * i + 2];
610f9fae5adSBarry Smith     alpha4 = x[4 * i + 3];
611f9fae5adSBarry Smith     while (n-- > 0) {
612f9fae5adSBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
613f9fae5adSBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
614f9fae5adSBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
615f9fae5adSBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
6169371c9d4SSatish Balay       idx++;
6179371c9d4SSatish Balay       v++;
618f9fae5adSBarry Smith     }
619f9fae5adSBarry Smith   }
6209566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
6219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
623f9fae5adSBarry Smith   PetscFunctionReturn(0);
624f9fae5adSBarry Smith }
625f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6266cd98798SBarry Smith 
627*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
628*d71ae5a4SJacob Faibussowitsch {
6294cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
630f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
631d2840e09SBarry Smith   const PetscScalar *x, *v;
632d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
633d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
634d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
635f9fae5adSBarry Smith 
636f9fae5adSBarry Smith   PetscFunctionBegin;
6379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
639f9fae5adSBarry Smith   idx = a->j;
640f9fae5adSBarry Smith   v   = a->a;
641f9fae5adSBarry Smith   ii  = a->i;
642f9fae5adSBarry Smith 
643f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
644f9fae5adSBarry Smith     jrow = ii[i];
645f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
646f9fae5adSBarry Smith     sum1 = 0.0;
647f9fae5adSBarry Smith     sum2 = 0.0;
648f9fae5adSBarry Smith     sum3 = 0.0;
649f9fae5adSBarry Smith     sum4 = 0.0;
650f9fae5adSBarry Smith     sum5 = 0.0;
65126fbe8dcSKarl Rupp 
652b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
653f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
654f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
655f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
656f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
657f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
658f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
659f9fae5adSBarry Smith       jrow++;
660f9fae5adSBarry Smith     }
661f9fae5adSBarry Smith     y[5 * i]     = sum1;
662f9fae5adSBarry Smith     y[5 * i + 1] = sum2;
663f9fae5adSBarry Smith     y[5 * i + 2] = sum3;
664f9fae5adSBarry Smith     y[5 * i + 3] = sum4;
665f9fae5adSBarry Smith     y[5 * i + 4] = sum5;
666f9fae5adSBarry Smith   }
667f9fae5adSBarry Smith 
6689566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
6699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
671f9fae5adSBarry Smith   PetscFunctionReturn(0);
672f9fae5adSBarry Smith }
673f9fae5adSBarry Smith 
674*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
675*d71ae5a4SJacob Faibussowitsch {
6764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
677f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
678d2840e09SBarry Smith   const PetscScalar *x, *v;
679d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
680d2840e09SBarry Smith   PetscInt           n, i;
681d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
682f9fae5adSBarry Smith 
683f9fae5adSBarry Smith   PetscFunctionBegin;
6849566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
6859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
687bfec09a0SHong Zhang 
688f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
689bfec09a0SHong Zhang     idx    = a->j + a->i[i];
690bfec09a0SHong Zhang     v      = a->a + a->i[i];
691f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
692f9fae5adSBarry Smith     alpha1 = x[5 * i];
693f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
694f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
695f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
696f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
697f9fae5adSBarry Smith     while (n-- > 0) {
698f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
699f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
700f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
701f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
702f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
7039371c9d4SSatish Balay       idx++;
7049371c9d4SSatish Balay       v++;
705f9fae5adSBarry Smith     }
706f9fae5adSBarry Smith   }
7079566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
710f9fae5adSBarry Smith   PetscFunctionReturn(0);
711f9fae5adSBarry Smith }
712f9fae5adSBarry Smith 
713*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
714*d71ae5a4SJacob Faibussowitsch {
7154cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
716f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
717d2840e09SBarry Smith   const PetscScalar *x, *v;
718d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
719b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
720d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
721f9fae5adSBarry Smith 
722f9fae5adSBarry Smith   PetscFunctionBegin;
7239566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7259566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
726f9fae5adSBarry Smith   idx = a->j;
727f9fae5adSBarry Smith   v   = a->a;
728f9fae5adSBarry Smith   ii  = a->i;
729f9fae5adSBarry Smith 
730f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
731f9fae5adSBarry Smith     jrow = ii[i];
732f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
733f9fae5adSBarry Smith     sum1 = 0.0;
734f9fae5adSBarry Smith     sum2 = 0.0;
735f9fae5adSBarry Smith     sum3 = 0.0;
736f9fae5adSBarry Smith     sum4 = 0.0;
737f9fae5adSBarry Smith     sum5 = 0.0;
738f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
739f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
740f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
741f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
742f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
743f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
744f9fae5adSBarry Smith       jrow++;
745f9fae5adSBarry Smith     }
746f9fae5adSBarry Smith     y[5 * i] += sum1;
747f9fae5adSBarry Smith     y[5 * i + 1] += sum2;
748f9fae5adSBarry Smith     y[5 * i + 2] += sum3;
749f9fae5adSBarry Smith     y[5 * i + 3] += sum4;
750f9fae5adSBarry Smith     y[5 * i + 4] += sum5;
751f9fae5adSBarry Smith   }
752f9fae5adSBarry Smith 
7539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
756f9fae5adSBarry Smith   PetscFunctionReturn(0);
757f9fae5adSBarry Smith }
758f9fae5adSBarry Smith 
759*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
760*d71ae5a4SJacob Faibussowitsch {
7614cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
762f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
763d2840e09SBarry Smith   const PetscScalar *x, *v;
764d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
765d2840e09SBarry Smith   PetscInt           n, i;
766d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
767f9fae5adSBarry Smith 
768f9fae5adSBarry Smith   PetscFunctionBegin;
7699566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7709566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7719566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
772bfec09a0SHong Zhang 
773f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
774bfec09a0SHong Zhang     idx    = a->j + a->i[i];
775bfec09a0SHong Zhang     v      = a->a + a->i[i];
776f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
777f9fae5adSBarry Smith     alpha1 = x[5 * i];
778f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
779f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
780f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
781f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
782f9fae5adSBarry Smith     while (n-- > 0) {
783f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
784f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
785f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
786f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
787f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
7889371c9d4SSatish Balay       idx++;
7899371c9d4SSatish Balay       v++;
790f9fae5adSBarry Smith     }
791f9fae5adSBarry Smith   }
7929566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
795f9fae5adSBarry Smith   PetscFunctionReturn(0);
796bcc973b7SBarry Smith }
797bcc973b7SBarry Smith 
7986cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
799*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
800*d71ae5a4SJacob Faibussowitsch {
8016cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8026cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
803d2840e09SBarry Smith   const PetscScalar *x, *v;
804d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
805d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
806d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
8076cd98798SBarry Smith 
8086cd98798SBarry Smith   PetscFunctionBegin;
8099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
8116cd98798SBarry Smith   idx = a->j;
8126cd98798SBarry Smith   v   = a->a;
8136cd98798SBarry Smith   ii  = a->i;
8146cd98798SBarry Smith 
8156cd98798SBarry Smith   for (i = 0; i < m; i++) {
8166cd98798SBarry Smith     jrow = ii[i];
8176cd98798SBarry Smith     n    = ii[i + 1] - jrow;
8186cd98798SBarry Smith     sum1 = 0.0;
8196cd98798SBarry Smith     sum2 = 0.0;
8206cd98798SBarry Smith     sum3 = 0.0;
8216cd98798SBarry Smith     sum4 = 0.0;
8226cd98798SBarry Smith     sum5 = 0.0;
8236cd98798SBarry Smith     sum6 = 0.0;
82426fbe8dcSKarl Rupp 
825b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
8266cd98798SBarry Smith     for (j = 0; j < n; j++) {
8276cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8286cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8296cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8306cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8316cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8326cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8336cd98798SBarry Smith       jrow++;
8346cd98798SBarry Smith     }
8356cd98798SBarry Smith     y[6 * i]     = sum1;
8366cd98798SBarry Smith     y[6 * i + 1] = sum2;
8376cd98798SBarry Smith     y[6 * i + 2] = sum3;
8386cd98798SBarry Smith     y[6 * i + 3] = sum4;
8396cd98798SBarry Smith     y[6 * i + 4] = sum5;
8406cd98798SBarry Smith     y[6 * i + 5] = sum6;
8416cd98798SBarry Smith   }
8426cd98798SBarry Smith 
8439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
8449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8466cd98798SBarry Smith   PetscFunctionReturn(0);
8476cd98798SBarry Smith }
8486cd98798SBarry Smith 
849*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
850*d71ae5a4SJacob Faibussowitsch {
8516cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8526cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
853d2840e09SBarry Smith   const PetscScalar *x, *v;
854d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
855d2840e09SBarry Smith   PetscInt           n, i;
856d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
8576cd98798SBarry Smith 
8586cd98798SBarry Smith   PetscFunctionBegin;
8599566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
8609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8619566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
862bfec09a0SHong Zhang 
8636cd98798SBarry Smith   for (i = 0; i < m; i++) {
864bfec09a0SHong Zhang     idx    = a->j + a->i[i];
865bfec09a0SHong Zhang     v      = a->a + a->i[i];
8666cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
8676cd98798SBarry Smith     alpha1 = x[6 * i];
8686cd98798SBarry Smith     alpha2 = x[6 * i + 1];
8696cd98798SBarry Smith     alpha3 = x[6 * i + 2];
8706cd98798SBarry Smith     alpha4 = x[6 * i + 3];
8716cd98798SBarry Smith     alpha5 = x[6 * i + 4];
8726cd98798SBarry Smith     alpha6 = x[6 * i + 5];
8736cd98798SBarry Smith     while (n-- > 0) {
8746cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
8756cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
8766cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
8776cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
8786cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
8796cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
8809371c9d4SSatish Balay       idx++;
8819371c9d4SSatish Balay       v++;
8826cd98798SBarry Smith     }
8836cd98798SBarry Smith   }
8849566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
8859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8876cd98798SBarry Smith   PetscFunctionReturn(0);
8886cd98798SBarry Smith }
8896cd98798SBarry Smith 
890*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
891*d71ae5a4SJacob Faibussowitsch {
8926cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8936cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
894d2840e09SBarry Smith   const PetscScalar *x, *v;
895d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
896b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
897d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
8986cd98798SBarry Smith 
8996cd98798SBarry Smith   PetscFunctionBegin;
9009566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
9019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
9036cd98798SBarry Smith   idx = a->j;
9046cd98798SBarry Smith   v   = a->a;
9056cd98798SBarry Smith   ii  = a->i;
9066cd98798SBarry Smith 
9076cd98798SBarry Smith   for (i = 0; i < m; i++) {
9086cd98798SBarry Smith     jrow = ii[i];
9096cd98798SBarry Smith     n    = ii[i + 1] - jrow;
9106cd98798SBarry Smith     sum1 = 0.0;
9116cd98798SBarry Smith     sum2 = 0.0;
9126cd98798SBarry Smith     sum3 = 0.0;
9136cd98798SBarry Smith     sum4 = 0.0;
9146cd98798SBarry Smith     sum5 = 0.0;
9156cd98798SBarry Smith     sum6 = 0.0;
9166cd98798SBarry Smith     for (j = 0; j < n; j++) {
9176cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
9186cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
9196cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
9206cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
9216cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
9226cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
9236cd98798SBarry Smith       jrow++;
9246cd98798SBarry Smith     }
9256cd98798SBarry Smith     y[6 * i] += sum1;
9266cd98798SBarry Smith     y[6 * i + 1] += sum2;
9276cd98798SBarry Smith     y[6 * i + 2] += sum3;
9286cd98798SBarry Smith     y[6 * i + 3] += sum4;
9296cd98798SBarry Smith     y[6 * i + 4] += sum5;
9306cd98798SBarry Smith     y[6 * i + 5] += sum6;
9316cd98798SBarry Smith   }
9326cd98798SBarry Smith 
9339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9366cd98798SBarry Smith   PetscFunctionReturn(0);
9376cd98798SBarry Smith }
9386cd98798SBarry Smith 
939*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
940*d71ae5a4SJacob Faibussowitsch {
9416cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
9426cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
943d2840e09SBarry Smith   const PetscScalar *x, *v;
944d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
945d2840e09SBarry Smith   PetscInt           n, i;
946d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
9476cd98798SBarry Smith 
9486cd98798SBarry Smith   PetscFunctionBegin;
9499566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
9509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
952bfec09a0SHong Zhang 
9536cd98798SBarry Smith   for (i = 0; i < m; i++) {
954bfec09a0SHong Zhang     idx    = a->j + a->i[i];
955bfec09a0SHong Zhang     v      = a->a + a->i[i];
9566cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
9576cd98798SBarry Smith     alpha1 = x[6 * i];
9586cd98798SBarry Smith     alpha2 = x[6 * i + 1];
9596cd98798SBarry Smith     alpha3 = x[6 * i + 2];
9606cd98798SBarry Smith     alpha4 = x[6 * i + 3];
9616cd98798SBarry Smith     alpha5 = x[6 * i + 4];
9626cd98798SBarry Smith     alpha6 = x[6 * i + 5];
9636cd98798SBarry Smith     while (n-- > 0) {
9646cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
9656cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
9666cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
9676cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
9686cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
9696cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
9709371c9d4SSatish Balay       idx++;
9719371c9d4SSatish Balay       v++;
9726cd98798SBarry Smith     }
9736cd98798SBarry Smith   }
9749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9776cd98798SBarry Smith   PetscFunctionReturn(0);
9786cd98798SBarry Smith }
9796cd98798SBarry Smith 
98066ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
981*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
982*d71ae5a4SJacob Faibussowitsch {
983ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
984ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
985d2840e09SBarry Smith   const PetscScalar *x, *v;
986d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
987d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
988d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
989ed8eea03SBarry Smith 
990ed8eea03SBarry Smith   PetscFunctionBegin;
9919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
993ed8eea03SBarry Smith   idx = a->j;
994ed8eea03SBarry Smith   v   = a->a;
995ed8eea03SBarry Smith   ii  = a->i;
996ed8eea03SBarry Smith 
997ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
998ed8eea03SBarry Smith     jrow = ii[i];
999ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
1000ed8eea03SBarry Smith     sum1 = 0.0;
1001ed8eea03SBarry Smith     sum2 = 0.0;
1002ed8eea03SBarry Smith     sum3 = 0.0;
1003ed8eea03SBarry Smith     sum4 = 0.0;
1004ed8eea03SBarry Smith     sum5 = 0.0;
1005ed8eea03SBarry Smith     sum6 = 0.0;
1006ed8eea03SBarry Smith     sum7 = 0.0;
100726fbe8dcSKarl Rupp 
1008b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
1009ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
1010ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
1011ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1012ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1013ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1014ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1015ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1016ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1017ed8eea03SBarry Smith       jrow++;
1018ed8eea03SBarry Smith     }
1019ed8eea03SBarry Smith     y[7 * i]     = sum1;
1020ed8eea03SBarry Smith     y[7 * i + 1] = sum2;
1021ed8eea03SBarry Smith     y[7 * i + 2] = sum3;
1022ed8eea03SBarry Smith     y[7 * i + 3] = sum4;
1023ed8eea03SBarry Smith     y[7 * i + 4] = sum5;
1024ed8eea03SBarry Smith     y[7 * i + 5] = sum6;
1025ed8eea03SBarry Smith     y[7 * i + 6] = sum7;
1026ed8eea03SBarry Smith   }
1027ed8eea03SBarry Smith 
10289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
10299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1031ed8eea03SBarry Smith   PetscFunctionReturn(0);
1032ed8eea03SBarry Smith }
1033ed8eea03SBarry Smith 
1034*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
1035*d71ae5a4SJacob Faibussowitsch {
1036ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1037ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1038d2840e09SBarry Smith   const PetscScalar *x, *v;
1039d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1040d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1041d2840e09SBarry Smith   PetscInt           n, i;
1042ed8eea03SBarry Smith 
1043ed8eea03SBarry Smith   PetscFunctionBegin;
10449566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
10459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1047ed8eea03SBarry Smith 
1048ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1049ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1050ed8eea03SBarry Smith     v      = a->a + a->i[i];
1051ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1052ed8eea03SBarry Smith     alpha1 = x[7 * i];
1053ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1054ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1055ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1056ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1057ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1058ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1059ed8eea03SBarry Smith     while (n-- > 0) {
1060ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1061ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1062ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1063ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1064ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1065ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1066ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
10679371c9d4SSatish Balay       idx++;
10689371c9d4SSatish Balay       v++;
1069ed8eea03SBarry Smith     }
1070ed8eea03SBarry Smith   }
10719566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1074ed8eea03SBarry Smith   PetscFunctionReturn(0);
1075ed8eea03SBarry Smith }
1076ed8eea03SBarry Smith 
1077*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1078*d71ae5a4SJacob Faibussowitsch {
1079ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1080ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1081d2840e09SBarry Smith   const PetscScalar *x, *v;
1082d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1083d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1084ed8eea03SBarry Smith   PetscInt           n, i, jrow, j;
1085ed8eea03SBarry Smith 
1086ed8eea03SBarry Smith   PetscFunctionBegin;
10879566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
10889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1090ed8eea03SBarry Smith   idx = a->j;
1091ed8eea03SBarry Smith   v   = a->a;
1092ed8eea03SBarry Smith   ii  = a->i;
1093ed8eea03SBarry Smith 
1094ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1095ed8eea03SBarry Smith     jrow = ii[i];
1096ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
1097ed8eea03SBarry Smith     sum1 = 0.0;
1098ed8eea03SBarry Smith     sum2 = 0.0;
1099ed8eea03SBarry Smith     sum3 = 0.0;
1100ed8eea03SBarry Smith     sum4 = 0.0;
1101ed8eea03SBarry Smith     sum5 = 0.0;
1102ed8eea03SBarry Smith     sum6 = 0.0;
1103ed8eea03SBarry Smith     sum7 = 0.0;
1104ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
1105ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
1106ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1107ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1108ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1109ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1110ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1111ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1112ed8eea03SBarry Smith       jrow++;
1113ed8eea03SBarry Smith     }
1114ed8eea03SBarry Smith     y[7 * i] += sum1;
1115ed8eea03SBarry Smith     y[7 * i + 1] += sum2;
1116ed8eea03SBarry Smith     y[7 * i + 2] += sum3;
1117ed8eea03SBarry Smith     y[7 * i + 3] += sum4;
1118ed8eea03SBarry Smith     y[7 * i + 4] += sum5;
1119ed8eea03SBarry Smith     y[7 * i + 5] += sum6;
1120ed8eea03SBarry Smith     y[7 * i + 6] += sum7;
1121ed8eea03SBarry Smith   }
1122ed8eea03SBarry Smith 
11239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1126ed8eea03SBarry Smith   PetscFunctionReturn(0);
1127ed8eea03SBarry Smith }
1128ed8eea03SBarry Smith 
1129*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1130*d71ae5a4SJacob Faibussowitsch {
1131ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1132ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1133d2840e09SBarry Smith   const PetscScalar *x, *v;
1134d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1135d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1136d2840e09SBarry Smith   PetscInt           n, i;
1137ed8eea03SBarry Smith 
1138ed8eea03SBarry Smith   PetscFunctionBegin;
11399566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
11409566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11419566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1142ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1143ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1144ed8eea03SBarry Smith     v      = a->a + a->i[i];
1145ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1146ed8eea03SBarry Smith     alpha1 = x[7 * i];
1147ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1148ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1149ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1150ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1151ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1152ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1153ed8eea03SBarry Smith     while (n-- > 0) {
1154ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1155ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1156ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1157ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1158ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1159ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1160ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
11619371c9d4SSatish Balay       idx++;
11629371c9d4SSatish Balay       v++;
1163ed8eea03SBarry Smith     }
1164ed8eea03SBarry Smith   }
11659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1168ed8eea03SBarry Smith   PetscFunctionReturn(0);
1169ed8eea03SBarry Smith }
1170ed8eea03SBarry Smith 
1171*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1172*d71ae5a4SJacob Faibussowitsch {
117366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
117466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1175d2840e09SBarry Smith   const PetscScalar *x, *v;
1176d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1177d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1178d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
117966ed3db0SBarry Smith 
118066ed3db0SBarry Smith   PetscFunctionBegin;
11819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11829566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
118366ed3db0SBarry Smith   idx = a->j;
118466ed3db0SBarry Smith   v   = a->a;
118566ed3db0SBarry Smith   ii  = a->i;
118666ed3db0SBarry Smith 
118766ed3db0SBarry Smith   for (i = 0; i < m; i++) {
118866ed3db0SBarry Smith     jrow = ii[i];
118966ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
119066ed3db0SBarry Smith     sum1 = 0.0;
119166ed3db0SBarry Smith     sum2 = 0.0;
119266ed3db0SBarry Smith     sum3 = 0.0;
119366ed3db0SBarry Smith     sum4 = 0.0;
119466ed3db0SBarry Smith     sum5 = 0.0;
119566ed3db0SBarry Smith     sum6 = 0.0;
119666ed3db0SBarry Smith     sum7 = 0.0;
119766ed3db0SBarry Smith     sum8 = 0.0;
119826fbe8dcSKarl Rupp 
1199b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
120066ed3db0SBarry Smith     for (j = 0; j < n; j++) {
120166ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
120266ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
120366ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
120466ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
120566ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
120666ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
120766ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
120866ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
120966ed3db0SBarry Smith       jrow++;
121066ed3db0SBarry Smith     }
121166ed3db0SBarry Smith     y[8 * i]     = sum1;
121266ed3db0SBarry Smith     y[8 * i + 1] = sum2;
121366ed3db0SBarry Smith     y[8 * i + 2] = sum3;
121466ed3db0SBarry Smith     y[8 * i + 3] = sum4;
121566ed3db0SBarry Smith     y[8 * i + 4] = sum5;
121666ed3db0SBarry Smith     y[8 * i + 5] = sum6;
121766ed3db0SBarry Smith     y[8 * i + 6] = sum7;
121866ed3db0SBarry Smith     y[8 * i + 7] = sum8;
121966ed3db0SBarry Smith   }
122066ed3db0SBarry Smith 
12219566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
12229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
122466ed3db0SBarry Smith   PetscFunctionReturn(0);
122566ed3db0SBarry Smith }
122666ed3db0SBarry Smith 
1227*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1228*d71ae5a4SJacob Faibussowitsch {
122966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
123066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1231d2840e09SBarry Smith   const PetscScalar *x, *v;
1232d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1233d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1234d2840e09SBarry Smith   PetscInt           n, i;
123566ed3db0SBarry Smith 
123666ed3db0SBarry Smith   PetscFunctionBegin;
12379566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
12389566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12399566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1240bfec09a0SHong Zhang 
124166ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1242bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1243bfec09a0SHong Zhang     v      = a->a + a->i[i];
124466ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
124566ed3db0SBarry Smith     alpha1 = x[8 * i];
124666ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
124766ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
124866ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
124966ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
125066ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
125166ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
125266ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
125366ed3db0SBarry Smith     while (n-- > 0) {
125466ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
125566ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
125666ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
125766ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
125866ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
125966ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
126066ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
126166ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
12629371c9d4SSatish Balay       idx++;
12639371c9d4SSatish Balay       v++;
126466ed3db0SBarry Smith     }
126566ed3db0SBarry Smith   }
12669566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
126966ed3db0SBarry Smith   PetscFunctionReturn(0);
127066ed3db0SBarry Smith }
127166ed3db0SBarry Smith 
1272*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1273*d71ae5a4SJacob Faibussowitsch {
127466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
127566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1276d2840e09SBarry Smith   const PetscScalar *x, *v;
1277d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1278d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1279b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
128066ed3db0SBarry Smith 
128166ed3db0SBarry Smith   PetscFunctionBegin;
12829566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
12839566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12849566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
128566ed3db0SBarry Smith   idx = a->j;
128666ed3db0SBarry Smith   v   = a->a;
128766ed3db0SBarry Smith   ii  = a->i;
128866ed3db0SBarry Smith 
128966ed3db0SBarry Smith   for (i = 0; i < m; i++) {
129066ed3db0SBarry Smith     jrow = ii[i];
129166ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
129266ed3db0SBarry Smith     sum1 = 0.0;
129366ed3db0SBarry Smith     sum2 = 0.0;
129466ed3db0SBarry Smith     sum3 = 0.0;
129566ed3db0SBarry Smith     sum4 = 0.0;
129666ed3db0SBarry Smith     sum5 = 0.0;
129766ed3db0SBarry Smith     sum6 = 0.0;
129866ed3db0SBarry Smith     sum7 = 0.0;
129966ed3db0SBarry Smith     sum8 = 0.0;
130066ed3db0SBarry Smith     for (j = 0; j < n; j++) {
130166ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
130266ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
130366ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
130466ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
130566ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
130666ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
130766ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
130866ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
130966ed3db0SBarry Smith       jrow++;
131066ed3db0SBarry Smith     }
131166ed3db0SBarry Smith     y[8 * i] += sum1;
131266ed3db0SBarry Smith     y[8 * i + 1] += sum2;
131366ed3db0SBarry Smith     y[8 * i + 2] += sum3;
131466ed3db0SBarry Smith     y[8 * i + 3] += sum4;
131566ed3db0SBarry Smith     y[8 * i + 4] += sum5;
131666ed3db0SBarry Smith     y[8 * i + 5] += sum6;
131766ed3db0SBarry Smith     y[8 * i + 6] += sum7;
131866ed3db0SBarry Smith     y[8 * i + 7] += sum8;
131966ed3db0SBarry Smith   }
132066ed3db0SBarry Smith 
13219566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
132466ed3db0SBarry Smith   PetscFunctionReturn(0);
132566ed3db0SBarry Smith }
132666ed3db0SBarry Smith 
1327*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1328*d71ae5a4SJacob Faibussowitsch {
132966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
133066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1331d2840e09SBarry Smith   const PetscScalar *x, *v;
1332d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1333d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1334d2840e09SBarry Smith   PetscInt           n, i;
133566ed3db0SBarry Smith 
133666ed3db0SBarry Smith   PetscFunctionBegin;
13379566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
13389566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13399566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
134066ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1341bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1342bfec09a0SHong Zhang     v      = a->a + a->i[i];
134366ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
134466ed3db0SBarry Smith     alpha1 = x[8 * i];
134566ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
134666ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
134766ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
134866ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
134966ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
135066ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
135166ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
135266ed3db0SBarry Smith     while (n-- > 0) {
135366ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
135466ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
135566ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
135666ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
135766ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
135866ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
135966ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
136066ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
13619371c9d4SSatish Balay       idx++;
13629371c9d4SSatish Balay       v++;
136366ed3db0SBarry Smith     }
136466ed3db0SBarry Smith   }
13659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
13682f7816d4SBarry Smith   PetscFunctionReturn(0);
13692f7816d4SBarry Smith }
13702f7816d4SBarry Smith 
13712b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1372*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1373*d71ae5a4SJacob Faibussowitsch {
13742b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13752b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1376d2840e09SBarry Smith   const PetscScalar *x, *v;
1377d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1378d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1379d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
13802b692628SMatthew Knepley 
13812b692628SMatthew Knepley   PetscFunctionBegin;
13829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13839566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
13842b692628SMatthew Knepley   idx = a->j;
13852b692628SMatthew Knepley   v   = a->a;
13862b692628SMatthew Knepley   ii  = a->i;
13872b692628SMatthew Knepley 
13882b692628SMatthew Knepley   for (i = 0; i < m; i++) {
13892b692628SMatthew Knepley     jrow = ii[i];
13902b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
13912b692628SMatthew Knepley     sum1 = 0.0;
13922b692628SMatthew Knepley     sum2 = 0.0;
13932b692628SMatthew Knepley     sum3 = 0.0;
13942b692628SMatthew Knepley     sum4 = 0.0;
13952b692628SMatthew Knepley     sum5 = 0.0;
13962b692628SMatthew Knepley     sum6 = 0.0;
13972b692628SMatthew Knepley     sum7 = 0.0;
13982b692628SMatthew Knepley     sum8 = 0.0;
13992b692628SMatthew Knepley     sum9 = 0.0;
140026fbe8dcSKarl Rupp 
1401b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
14022b692628SMatthew Knepley     for (j = 0; j < n; j++) {
14032b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
14042b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
14052b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
14062b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
14072b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
14082b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
14092b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
14102b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
14112b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
14122b692628SMatthew Knepley       jrow++;
14132b692628SMatthew Knepley     }
14142b692628SMatthew Knepley     y[9 * i]     = sum1;
14152b692628SMatthew Knepley     y[9 * i + 1] = sum2;
14162b692628SMatthew Knepley     y[9 * i + 2] = sum3;
14172b692628SMatthew Knepley     y[9 * i + 3] = sum4;
14182b692628SMatthew Knepley     y[9 * i + 4] = sum5;
14192b692628SMatthew Knepley     y[9 * i + 5] = sum6;
14202b692628SMatthew Knepley     y[9 * i + 6] = sum7;
14212b692628SMatthew Knepley     y[9 * i + 7] = sum8;
14222b692628SMatthew Knepley     y[9 * i + 8] = sum9;
14232b692628SMatthew Knepley   }
14242b692628SMatthew Knepley 
14259566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
14269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14282b692628SMatthew Knepley   PetscFunctionReturn(0);
14292b692628SMatthew Knepley }
14302b692628SMatthew Knepley 
1431b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1432b9cda22cSBarry Smith 
1433*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1434*d71ae5a4SJacob Faibussowitsch {
14352b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14362b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1437d2840e09SBarry Smith   const PetscScalar *x, *v;
1438d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1439d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1440d2840e09SBarry Smith   PetscInt           n, i;
14412b692628SMatthew Knepley 
14422b692628SMatthew Knepley   PetscFunctionBegin;
14439566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
14449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14459566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
14462b692628SMatthew Knepley 
14472b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14482b692628SMatthew Knepley     idx    = a->j + a->i[i];
14492b692628SMatthew Knepley     v      = a->a + a->i[i];
14502b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
14512b692628SMatthew Knepley     alpha1 = x[9 * i];
14522b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
14532b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
14542b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
14552b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
14562b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
14572b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
14582b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
14592f6bd0c6SMatthew Knepley     alpha9 = x[9 * i + 8];
14602b692628SMatthew Knepley     while (n-- > 0) {
14612b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
14622b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
14632b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
14642b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
14652b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
14662b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
14672b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
14682b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
14692b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
14709371c9d4SSatish Balay       idx++;
14719371c9d4SSatish Balay       v++;
14722b692628SMatthew Knepley     }
14732b692628SMatthew Knepley   }
14749566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14772b692628SMatthew Knepley   PetscFunctionReturn(0);
14782b692628SMatthew Knepley }
14792b692628SMatthew Knepley 
1480*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1481*d71ae5a4SJacob Faibussowitsch {
14822b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14832b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1484d2840e09SBarry Smith   const PetscScalar *x, *v;
1485d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1486d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1487b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
14882b692628SMatthew Knepley 
14892b692628SMatthew Knepley   PetscFunctionBegin;
14909566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
14919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
14932b692628SMatthew Knepley   idx = a->j;
14942b692628SMatthew Knepley   v   = a->a;
14952b692628SMatthew Knepley   ii  = a->i;
14962b692628SMatthew Knepley 
14972b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14982b692628SMatthew Knepley     jrow = ii[i];
14992b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
15002b692628SMatthew Knepley     sum1 = 0.0;
15012b692628SMatthew Knepley     sum2 = 0.0;
15022b692628SMatthew Knepley     sum3 = 0.0;
15032b692628SMatthew Knepley     sum4 = 0.0;
15042b692628SMatthew Knepley     sum5 = 0.0;
15052b692628SMatthew Knepley     sum6 = 0.0;
15062b692628SMatthew Knepley     sum7 = 0.0;
15072b692628SMatthew Knepley     sum8 = 0.0;
15082b692628SMatthew Knepley     sum9 = 0.0;
15092b692628SMatthew Knepley     for (j = 0; j < n; j++) {
15102b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
15112b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
15122b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
15132b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
15142b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
15152b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
15162b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
15172b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
15182b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
15192b692628SMatthew Knepley       jrow++;
15202b692628SMatthew Knepley     }
15212b692628SMatthew Knepley     y[9 * i] += sum1;
15222b692628SMatthew Knepley     y[9 * i + 1] += sum2;
15232b692628SMatthew Knepley     y[9 * i + 2] += sum3;
15242b692628SMatthew Knepley     y[9 * i + 3] += sum4;
15252b692628SMatthew Knepley     y[9 * i + 4] += sum5;
15262b692628SMatthew Knepley     y[9 * i + 5] += sum6;
15272b692628SMatthew Knepley     y[9 * i + 6] += sum7;
15282b692628SMatthew Knepley     y[9 * i + 7] += sum8;
15292b692628SMatthew Knepley     y[9 * i + 8] += sum9;
15302b692628SMatthew Knepley   }
15312b692628SMatthew Knepley 
15329566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15339566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15352b692628SMatthew Knepley   PetscFunctionReturn(0);
15362b692628SMatthew Knepley }
15372b692628SMatthew Knepley 
1538*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1539*d71ae5a4SJacob Faibussowitsch {
15402b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
15412b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1542d2840e09SBarry Smith   const PetscScalar *x, *v;
1543d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1544d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1545d2840e09SBarry Smith   PetscInt           n, i;
15462b692628SMatthew Knepley 
15472b692628SMatthew Knepley   PetscFunctionBegin;
15489566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
15499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
15512b692628SMatthew Knepley   for (i = 0; i < m; i++) {
15522b692628SMatthew Knepley     idx    = a->j + a->i[i];
15532b692628SMatthew Knepley     v      = a->a + a->i[i];
15542b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
15552b692628SMatthew Knepley     alpha1 = x[9 * i];
15562b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
15572b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
15582b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
15592b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
15602b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
15612b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
15622b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
15632b692628SMatthew Knepley     alpha9 = x[9 * i + 8];
15642b692628SMatthew Knepley     while (n-- > 0) {
15652b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
15662b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
15672b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
15682b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
15692b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
15702b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
15712b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
15722b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
15732b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
15749371c9d4SSatish Balay       idx++;
15759371c9d4SSatish Balay       v++;
15762b692628SMatthew Knepley     }
15772b692628SMatthew Knepley   }
15789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15812b692628SMatthew Knepley   PetscFunctionReturn(0);
15822b692628SMatthew Knepley }
1583*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1584*d71ae5a4SJacob Faibussowitsch {
1585b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1586b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1587d2840e09SBarry Smith   const PetscScalar *x, *v;
1588d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1589d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1590d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1591b9cda22cSBarry Smith 
1592b9cda22cSBarry Smith   PetscFunctionBegin;
15939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15949566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1595b9cda22cSBarry Smith   idx = a->j;
1596b9cda22cSBarry Smith   v   = a->a;
1597b9cda22cSBarry Smith   ii  = a->i;
1598b9cda22cSBarry Smith 
1599b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1600b9cda22cSBarry Smith     jrow  = ii[i];
1601b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1602b9cda22cSBarry Smith     sum1  = 0.0;
1603b9cda22cSBarry Smith     sum2  = 0.0;
1604b9cda22cSBarry Smith     sum3  = 0.0;
1605b9cda22cSBarry Smith     sum4  = 0.0;
1606b9cda22cSBarry Smith     sum5  = 0.0;
1607b9cda22cSBarry Smith     sum6  = 0.0;
1608b9cda22cSBarry Smith     sum7  = 0.0;
1609b9cda22cSBarry Smith     sum8  = 0.0;
1610b9cda22cSBarry Smith     sum9  = 0.0;
1611b9cda22cSBarry Smith     sum10 = 0.0;
161226fbe8dcSKarl Rupp 
1613b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
1614b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1615b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1616b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1617b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1618b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1619b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1620b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1621b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1622b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1623b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1624b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1625b9cda22cSBarry Smith       jrow++;
1626b9cda22cSBarry Smith     }
1627b9cda22cSBarry Smith     y[10 * i]     = sum1;
1628b9cda22cSBarry Smith     y[10 * i + 1] = sum2;
1629b9cda22cSBarry Smith     y[10 * i + 2] = sum3;
1630b9cda22cSBarry Smith     y[10 * i + 3] = sum4;
1631b9cda22cSBarry Smith     y[10 * i + 4] = sum5;
1632b9cda22cSBarry Smith     y[10 * i + 5] = sum6;
1633b9cda22cSBarry Smith     y[10 * i + 6] = sum7;
1634b9cda22cSBarry Smith     y[10 * i + 7] = sum8;
1635b9cda22cSBarry Smith     y[10 * i + 8] = sum9;
1636b9cda22cSBarry Smith     y[10 * i + 9] = sum10;
1637b9cda22cSBarry Smith   }
1638b9cda22cSBarry Smith 
16399566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
16409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1642b9cda22cSBarry Smith   PetscFunctionReturn(0);
1643b9cda22cSBarry Smith }
1644b9cda22cSBarry Smith 
1645*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1646*d71ae5a4SJacob Faibussowitsch {
1647b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1648b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1649d2840e09SBarry Smith   const PetscScalar *x, *v;
1650d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1651d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1652b9cda22cSBarry Smith   PetscInt           n, i, jrow, j;
1653b9cda22cSBarry Smith 
1654b9cda22cSBarry Smith   PetscFunctionBegin;
16559566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
16569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1658b9cda22cSBarry Smith   idx = a->j;
1659b9cda22cSBarry Smith   v   = a->a;
1660b9cda22cSBarry Smith   ii  = a->i;
1661b9cda22cSBarry Smith 
1662b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1663b9cda22cSBarry Smith     jrow  = ii[i];
1664b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1665b9cda22cSBarry Smith     sum1  = 0.0;
1666b9cda22cSBarry Smith     sum2  = 0.0;
1667b9cda22cSBarry Smith     sum3  = 0.0;
1668b9cda22cSBarry Smith     sum4  = 0.0;
1669b9cda22cSBarry Smith     sum5  = 0.0;
1670b9cda22cSBarry Smith     sum6  = 0.0;
1671b9cda22cSBarry Smith     sum7  = 0.0;
1672b9cda22cSBarry Smith     sum8  = 0.0;
1673b9cda22cSBarry Smith     sum9  = 0.0;
1674b9cda22cSBarry Smith     sum10 = 0.0;
1675b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1676b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1677b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1678b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1679b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1680b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1681b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1682b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1683b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1684b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1685b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1686b9cda22cSBarry Smith       jrow++;
1687b9cda22cSBarry Smith     }
1688b9cda22cSBarry Smith     y[10 * i] += sum1;
1689b9cda22cSBarry Smith     y[10 * i + 1] += sum2;
1690b9cda22cSBarry Smith     y[10 * i + 2] += sum3;
1691b9cda22cSBarry Smith     y[10 * i + 3] += sum4;
1692b9cda22cSBarry Smith     y[10 * i + 4] += sum5;
1693b9cda22cSBarry Smith     y[10 * i + 5] += sum6;
1694b9cda22cSBarry Smith     y[10 * i + 6] += sum7;
1695b9cda22cSBarry Smith     y[10 * i + 7] += sum8;
1696b9cda22cSBarry Smith     y[10 * i + 8] += sum9;
1697b9cda22cSBarry Smith     y[10 * i + 9] += sum10;
1698b9cda22cSBarry Smith   }
1699b9cda22cSBarry Smith 
17009566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1703b9cda22cSBarry Smith   PetscFunctionReturn(0);
1704b9cda22cSBarry Smith }
1705b9cda22cSBarry Smith 
1706*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1707*d71ae5a4SJacob Faibussowitsch {
1708b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1709b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1710d2840e09SBarry Smith   const PetscScalar *x, *v;
1711d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1712d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1713d2840e09SBarry Smith   PetscInt           n, i;
1714b9cda22cSBarry Smith 
1715b9cda22cSBarry Smith   PetscFunctionBegin;
17169566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
17179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1719b9cda22cSBarry Smith 
1720b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1721b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1722b9cda22cSBarry Smith     v       = a->a + a->i[i];
1723b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1724e29fdc22SBarry Smith     alpha1  = x[10 * i];
1725e29fdc22SBarry Smith     alpha2  = x[10 * i + 1];
1726e29fdc22SBarry Smith     alpha3  = x[10 * i + 2];
1727e29fdc22SBarry Smith     alpha4  = x[10 * i + 3];
1728e29fdc22SBarry Smith     alpha5  = x[10 * i + 4];
1729e29fdc22SBarry Smith     alpha6  = x[10 * i + 5];
1730e29fdc22SBarry Smith     alpha7  = x[10 * i + 6];
1731e29fdc22SBarry Smith     alpha8  = x[10 * i + 7];
1732e29fdc22SBarry Smith     alpha9  = x[10 * i + 8];
1733b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1734b9cda22cSBarry Smith     while (n-- > 0) {
1735e29fdc22SBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1736e29fdc22SBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1737e29fdc22SBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1738e29fdc22SBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1739e29fdc22SBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1740e29fdc22SBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1741e29fdc22SBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1742e29fdc22SBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1743e29fdc22SBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1744b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17459371c9d4SSatish Balay       idx++;
17469371c9d4SSatish Balay       v++;
1747b9cda22cSBarry Smith     }
1748b9cda22cSBarry Smith   }
17499566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1752b9cda22cSBarry Smith   PetscFunctionReturn(0);
1753b9cda22cSBarry Smith }
1754b9cda22cSBarry Smith 
1755*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1756*d71ae5a4SJacob Faibussowitsch {
1757b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1758b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1759d2840e09SBarry Smith   const PetscScalar *x, *v;
1760d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1761d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1762d2840e09SBarry Smith   PetscInt           n, i;
1763b9cda22cSBarry Smith 
1764b9cda22cSBarry Smith   PetscFunctionBegin;
17659566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
17669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1768b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1769b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1770b9cda22cSBarry Smith     v       = a->a + a->i[i];
1771b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1772b9cda22cSBarry Smith     alpha1  = x[10 * i];
1773b9cda22cSBarry Smith     alpha2  = x[10 * i + 1];
1774b9cda22cSBarry Smith     alpha3  = x[10 * i + 2];
1775b9cda22cSBarry Smith     alpha4  = x[10 * i + 3];
1776b9cda22cSBarry Smith     alpha5  = x[10 * i + 4];
1777b9cda22cSBarry Smith     alpha6  = x[10 * i + 5];
1778b9cda22cSBarry Smith     alpha7  = x[10 * i + 6];
1779b9cda22cSBarry Smith     alpha8  = x[10 * i + 7];
1780b9cda22cSBarry Smith     alpha9  = x[10 * i + 8];
1781b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1782b9cda22cSBarry Smith     while (n-- > 0) {
1783b9cda22cSBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1784b9cda22cSBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1785b9cda22cSBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1786b9cda22cSBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1787b9cda22cSBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1788b9cda22cSBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1789b9cda22cSBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1790b9cda22cSBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1791b9cda22cSBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1792b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17939371c9d4SSatish Balay       idx++;
17949371c9d4SSatish Balay       v++;
1795b9cda22cSBarry Smith     }
1796b9cda22cSBarry Smith   }
17979566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1800b9cda22cSBarry Smith   PetscFunctionReturn(0);
1801b9cda22cSBarry Smith }
1802b9cda22cSBarry Smith 
18032f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1804*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1805*d71ae5a4SJacob Faibussowitsch {
1806dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1807dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1808d2840e09SBarry Smith   const PetscScalar *x, *v;
1809d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1810d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1811d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1812dbdd7285SBarry Smith 
1813dbdd7285SBarry Smith   PetscFunctionBegin;
18149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1816dbdd7285SBarry Smith   idx = a->j;
1817dbdd7285SBarry Smith   v   = a->a;
1818dbdd7285SBarry Smith   ii  = a->i;
1819dbdd7285SBarry Smith 
1820dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1821dbdd7285SBarry Smith     jrow  = ii[i];
1822dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1823dbdd7285SBarry Smith     sum1  = 0.0;
1824dbdd7285SBarry Smith     sum2  = 0.0;
1825dbdd7285SBarry Smith     sum3  = 0.0;
1826dbdd7285SBarry Smith     sum4  = 0.0;
1827dbdd7285SBarry Smith     sum5  = 0.0;
1828dbdd7285SBarry Smith     sum6  = 0.0;
1829dbdd7285SBarry Smith     sum7  = 0.0;
1830dbdd7285SBarry Smith     sum8  = 0.0;
1831dbdd7285SBarry Smith     sum9  = 0.0;
1832dbdd7285SBarry Smith     sum10 = 0.0;
1833dbdd7285SBarry Smith     sum11 = 0.0;
183426fbe8dcSKarl Rupp 
1835dbdd7285SBarry Smith     nonzerorow += (n > 0);
1836dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1837dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1838dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1839dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1840dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1841dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1842dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1843dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1844dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1845dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1846dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1847dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1848dbdd7285SBarry Smith       jrow++;
1849dbdd7285SBarry Smith     }
1850dbdd7285SBarry Smith     y[11 * i]      = sum1;
1851dbdd7285SBarry Smith     y[11 * i + 1]  = sum2;
1852dbdd7285SBarry Smith     y[11 * i + 2]  = sum3;
1853dbdd7285SBarry Smith     y[11 * i + 3]  = sum4;
1854dbdd7285SBarry Smith     y[11 * i + 4]  = sum5;
1855dbdd7285SBarry Smith     y[11 * i + 5]  = sum6;
1856dbdd7285SBarry Smith     y[11 * i + 6]  = sum7;
1857dbdd7285SBarry Smith     y[11 * i + 7]  = sum8;
1858dbdd7285SBarry Smith     y[11 * i + 8]  = sum9;
1859dbdd7285SBarry Smith     y[11 * i + 9]  = sum10;
1860dbdd7285SBarry Smith     y[11 * i + 10] = sum11;
1861dbdd7285SBarry Smith   }
1862dbdd7285SBarry Smith 
18639566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
18649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1866dbdd7285SBarry Smith   PetscFunctionReturn(0);
1867dbdd7285SBarry Smith }
1868dbdd7285SBarry Smith 
1869*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1870*d71ae5a4SJacob Faibussowitsch {
1871dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1872dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1873d2840e09SBarry Smith   const PetscScalar *x, *v;
1874d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1875d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1876dbdd7285SBarry Smith   PetscInt           n, i, jrow, j;
1877dbdd7285SBarry Smith 
1878dbdd7285SBarry Smith   PetscFunctionBegin;
18799566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
18809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1882dbdd7285SBarry Smith   idx = a->j;
1883dbdd7285SBarry Smith   v   = a->a;
1884dbdd7285SBarry Smith   ii  = a->i;
1885dbdd7285SBarry Smith 
1886dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1887dbdd7285SBarry Smith     jrow  = ii[i];
1888dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1889dbdd7285SBarry Smith     sum1  = 0.0;
1890dbdd7285SBarry Smith     sum2  = 0.0;
1891dbdd7285SBarry Smith     sum3  = 0.0;
1892dbdd7285SBarry Smith     sum4  = 0.0;
1893dbdd7285SBarry Smith     sum5  = 0.0;
1894dbdd7285SBarry Smith     sum6  = 0.0;
1895dbdd7285SBarry Smith     sum7  = 0.0;
1896dbdd7285SBarry Smith     sum8  = 0.0;
1897dbdd7285SBarry Smith     sum9  = 0.0;
1898dbdd7285SBarry Smith     sum10 = 0.0;
1899dbdd7285SBarry Smith     sum11 = 0.0;
1900dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1901dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1902dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1903dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1904dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1905dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1906dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1907dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1908dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1909dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1910dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1911dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1912dbdd7285SBarry Smith       jrow++;
1913dbdd7285SBarry Smith     }
1914dbdd7285SBarry Smith     y[11 * i] += sum1;
1915dbdd7285SBarry Smith     y[11 * i + 1] += sum2;
1916dbdd7285SBarry Smith     y[11 * i + 2] += sum3;
1917dbdd7285SBarry Smith     y[11 * i + 3] += sum4;
1918dbdd7285SBarry Smith     y[11 * i + 4] += sum5;
1919dbdd7285SBarry Smith     y[11 * i + 5] += sum6;
1920dbdd7285SBarry Smith     y[11 * i + 6] += sum7;
1921dbdd7285SBarry Smith     y[11 * i + 7] += sum8;
1922dbdd7285SBarry Smith     y[11 * i + 8] += sum9;
1923dbdd7285SBarry Smith     y[11 * i + 9] += sum10;
1924dbdd7285SBarry Smith     y[11 * i + 10] += sum11;
1925dbdd7285SBarry Smith   }
1926dbdd7285SBarry Smith 
19279566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1930dbdd7285SBarry Smith   PetscFunctionReturn(0);
1931dbdd7285SBarry Smith }
1932dbdd7285SBarry Smith 
1933*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1934*d71ae5a4SJacob Faibussowitsch {
1935dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1936dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1937d2840e09SBarry Smith   const PetscScalar *x, *v;
1938d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1939d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1940d2840e09SBarry Smith   PetscInt           n, i;
1941dbdd7285SBarry Smith 
1942dbdd7285SBarry Smith   PetscFunctionBegin;
19439566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
19449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19459566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1946dbdd7285SBarry Smith 
1947dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1948dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1949dbdd7285SBarry Smith     v       = a->a + a->i[i];
1950dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1951dbdd7285SBarry Smith     alpha1  = x[11 * i];
1952dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1953dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1954dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1955dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1956dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1957dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1958dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1959dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1960dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1961dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1962dbdd7285SBarry Smith     while (n-- > 0) {
1963dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1964dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1965dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1966dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1967dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1968dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1969dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1970dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1971dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1972dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1973dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19749371c9d4SSatish Balay       idx++;
19759371c9d4SSatish Balay       v++;
1976dbdd7285SBarry Smith     }
1977dbdd7285SBarry Smith   }
19789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1981dbdd7285SBarry Smith   PetscFunctionReturn(0);
1982dbdd7285SBarry Smith }
1983dbdd7285SBarry Smith 
1984*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1985*d71ae5a4SJacob Faibussowitsch {
1986dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1987dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1988d2840e09SBarry Smith   const PetscScalar *x, *v;
1989d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1990d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1991d2840e09SBarry Smith   PetscInt           n, i;
1992dbdd7285SBarry Smith 
1993dbdd7285SBarry Smith   PetscFunctionBegin;
19949566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
19959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1997dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1998dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1999dbdd7285SBarry Smith     v       = a->a + a->i[i];
2000dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
2001dbdd7285SBarry Smith     alpha1  = x[11 * i];
2002dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
2003dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
2004dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
2005dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
2006dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
2007dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
2008dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
2009dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
2010dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
2011dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
2012dbdd7285SBarry Smith     while (n-- > 0) {
2013dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
2014dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
2015dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
2016dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
2017dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
2018dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
2019dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
2020dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
2021dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
2022dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
2023dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
20249371c9d4SSatish Balay       idx++;
20259371c9d4SSatish Balay       v++;
2026dbdd7285SBarry Smith     }
2027dbdd7285SBarry Smith   }
20289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
20299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
20309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2031dbdd7285SBarry Smith   PetscFunctionReturn(0);
2032dbdd7285SBarry Smith }
2033dbdd7285SBarry Smith 
2034dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2035*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2036*d71ae5a4SJacob Faibussowitsch {
20372f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
20382f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2039d2840e09SBarry Smith   const PetscScalar *x, *v;
2040d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20412f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2042d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2043d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
20442f7816d4SBarry Smith 
20452f7816d4SBarry Smith   PetscFunctionBegin;
20469566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
20479566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
20482f7816d4SBarry Smith   idx = a->j;
20492f7816d4SBarry Smith   v   = a->a;
20502f7816d4SBarry Smith   ii  = a->i;
20512f7816d4SBarry Smith 
20522f7816d4SBarry Smith   for (i = 0; i < m; i++) {
20532f7816d4SBarry Smith     jrow  = ii[i];
20542f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
20552f7816d4SBarry Smith     sum1  = 0.0;
20562f7816d4SBarry Smith     sum2  = 0.0;
20572f7816d4SBarry Smith     sum3  = 0.0;
20582f7816d4SBarry Smith     sum4  = 0.0;
20592f7816d4SBarry Smith     sum5  = 0.0;
20602f7816d4SBarry Smith     sum6  = 0.0;
20612f7816d4SBarry Smith     sum7  = 0.0;
20622f7816d4SBarry Smith     sum8  = 0.0;
20632f7816d4SBarry Smith     sum9  = 0.0;
20642f7816d4SBarry Smith     sum10 = 0.0;
20652f7816d4SBarry Smith     sum11 = 0.0;
20662f7816d4SBarry Smith     sum12 = 0.0;
20672f7816d4SBarry Smith     sum13 = 0.0;
20682f7816d4SBarry Smith     sum14 = 0.0;
20692f7816d4SBarry Smith     sum15 = 0.0;
20702f7816d4SBarry Smith     sum16 = 0.0;
207126fbe8dcSKarl Rupp 
2072b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
20732f7816d4SBarry Smith     for (j = 0; j < n; j++) {
20742f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
20752f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
20762f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
20772f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
20782f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
20792f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
20802f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
20812f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
20822f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
20832f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
20842f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
20852f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
20862f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
20872f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
20882f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
20892f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
20902f7816d4SBarry Smith       jrow++;
20912f7816d4SBarry Smith     }
20922f7816d4SBarry Smith     y[16 * i]      = sum1;
20932f7816d4SBarry Smith     y[16 * i + 1]  = sum2;
20942f7816d4SBarry Smith     y[16 * i + 2]  = sum3;
20952f7816d4SBarry Smith     y[16 * i + 3]  = sum4;
20962f7816d4SBarry Smith     y[16 * i + 4]  = sum5;
20972f7816d4SBarry Smith     y[16 * i + 5]  = sum6;
20982f7816d4SBarry Smith     y[16 * i + 6]  = sum7;
20992f7816d4SBarry Smith     y[16 * i + 7]  = sum8;
21002f7816d4SBarry Smith     y[16 * i + 8]  = sum9;
21012f7816d4SBarry Smith     y[16 * i + 9]  = sum10;
21022f7816d4SBarry Smith     y[16 * i + 10] = sum11;
21032f7816d4SBarry Smith     y[16 * i + 11] = sum12;
21042f7816d4SBarry Smith     y[16 * i + 12] = sum13;
21052f7816d4SBarry Smith     y[16 * i + 13] = sum14;
21062f7816d4SBarry Smith     y[16 * i + 14] = sum15;
21072f7816d4SBarry Smith     y[16 * i + 15] = sum16;
21082f7816d4SBarry Smith   }
21092f7816d4SBarry Smith 
21109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
21119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
21129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21132f7816d4SBarry Smith   PetscFunctionReturn(0);
21142f7816d4SBarry Smith }
21152f7816d4SBarry Smith 
2116*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2117*d71ae5a4SJacob Faibussowitsch {
21182f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
21192f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2120d2840e09SBarry Smith   const PetscScalar *x, *v;
2121d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
21222f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2123d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2124d2840e09SBarry Smith   PetscInt           n, i;
21252f7816d4SBarry Smith 
21262f7816d4SBarry Smith   PetscFunctionBegin;
21279566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
21289566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21299566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2130bfec09a0SHong Zhang 
21312f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2132bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2133bfec09a0SHong Zhang     v       = a->a + a->i[i];
21342f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
21352f7816d4SBarry Smith     alpha1  = x[16 * i];
21362f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
21372f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
21382f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
21392f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
21402f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
21412f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
21422f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
21432f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
21442f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
21452f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
21462f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
21472f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
21482f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
21492f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
21502f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
21512f7816d4SBarry Smith     while (n-- > 0) {
21522f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
21532f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
21542f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
21552f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
21562f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
21572f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
21582f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
21592f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
21602f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
21612f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
21622f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
21632f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
21642f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
21652f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
21662f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
21672f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
21689371c9d4SSatish Balay       idx++;
21699371c9d4SSatish Balay       v++;
21702f7816d4SBarry Smith     }
21712f7816d4SBarry Smith   }
21729566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
21739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
21749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21752f7816d4SBarry Smith   PetscFunctionReturn(0);
21762f7816d4SBarry Smith }
21772f7816d4SBarry Smith 
2178*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2179*d71ae5a4SJacob Faibussowitsch {
21802f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
21812f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2182d2840e09SBarry Smith   const PetscScalar *x, *v;
2183d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21842f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2185d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2186b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
21872f7816d4SBarry Smith 
21882f7816d4SBarry Smith   PetscFunctionBegin;
21899566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
21909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
21922f7816d4SBarry Smith   idx = a->j;
21932f7816d4SBarry Smith   v   = a->a;
21942f7816d4SBarry Smith   ii  = a->i;
21952f7816d4SBarry Smith 
21962f7816d4SBarry Smith   for (i = 0; i < m; i++) {
21972f7816d4SBarry Smith     jrow  = ii[i];
21982f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
21992f7816d4SBarry Smith     sum1  = 0.0;
22002f7816d4SBarry Smith     sum2  = 0.0;
22012f7816d4SBarry Smith     sum3  = 0.0;
22022f7816d4SBarry Smith     sum4  = 0.0;
22032f7816d4SBarry Smith     sum5  = 0.0;
22042f7816d4SBarry Smith     sum6  = 0.0;
22052f7816d4SBarry Smith     sum7  = 0.0;
22062f7816d4SBarry Smith     sum8  = 0.0;
22072f7816d4SBarry Smith     sum9  = 0.0;
22082f7816d4SBarry Smith     sum10 = 0.0;
22092f7816d4SBarry Smith     sum11 = 0.0;
22102f7816d4SBarry Smith     sum12 = 0.0;
22112f7816d4SBarry Smith     sum13 = 0.0;
22122f7816d4SBarry Smith     sum14 = 0.0;
22132f7816d4SBarry Smith     sum15 = 0.0;
22142f7816d4SBarry Smith     sum16 = 0.0;
22152f7816d4SBarry Smith     for (j = 0; j < n; j++) {
22162f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
22172f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
22182f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
22192f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
22202f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
22212f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
22222f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
22232f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
22242f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
22252f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
22262f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
22272f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
22282f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
22292f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
22302f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
22312f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
22322f7816d4SBarry Smith       jrow++;
22332f7816d4SBarry Smith     }
22342f7816d4SBarry Smith     y[16 * i] += sum1;
22352f7816d4SBarry Smith     y[16 * i + 1] += sum2;
22362f7816d4SBarry Smith     y[16 * i + 2] += sum3;
22372f7816d4SBarry Smith     y[16 * i + 3] += sum4;
22382f7816d4SBarry Smith     y[16 * i + 4] += sum5;
22392f7816d4SBarry Smith     y[16 * i + 5] += sum6;
22402f7816d4SBarry Smith     y[16 * i + 6] += sum7;
22412f7816d4SBarry Smith     y[16 * i + 7] += sum8;
22422f7816d4SBarry Smith     y[16 * i + 8] += sum9;
22432f7816d4SBarry Smith     y[16 * i + 9] += sum10;
22442f7816d4SBarry Smith     y[16 * i + 10] += sum11;
22452f7816d4SBarry Smith     y[16 * i + 11] += sum12;
22462f7816d4SBarry Smith     y[16 * i + 12] += sum13;
22472f7816d4SBarry Smith     y[16 * i + 13] += sum14;
22482f7816d4SBarry Smith     y[16 * i + 14] += sum15;
22492f7816d4SBarry Smith     y[16 * i + 15] += sum16;
22502f7816d4SBarry Smith   }
22512f7816d4SBarry Smith 
22529566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
22552f7816d4SBarry Smith   PetscFunctionReturn(0);
22562f7816d4SBarry Smith }
22572f7816d4SBarry Smith 
2258*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2259*d71ae5a4SJacob Faibussowitsch {
22602f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
22612f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2262d2840e09SBarry Smith   const PetscScalar *x, *v;
2263d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
22642f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2265d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2266d2840e09SBarry Smith   PetscInt           n, i;
22672f7816d4SBarry Smith 
22682f7816d4SBarry Smith   PetscFunctionBegin;
22699566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
22709566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22719566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
22722f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2273bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2274bfec09a0SHong Zhang     v       = a->a + a->i[i];
22752f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
22762f7816d4SBarry Smith     alpha1  = x[16 * i];
22772f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
22782f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
22792f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
22802f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
22812f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
22822f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
22832f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
22842f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
22852f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
22862f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
22872f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
22882f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
22892f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
22902f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
22912f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
22922f7816d4SBarry Smith     while (n-- > 0) {
22932f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
22942f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
22952f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
22962f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
22972f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
22982f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
22992f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
23002f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
23012f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
23022f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
23032f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
23042f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
23052f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
23062f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
23072f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
23082f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
23099371c9d4SSatish Balay       idx++;
23109371c9d4SSatish Balay       v++;
23112f7816d4SBarry Smith     }
23122f7816d4SBarry Smith   }
23139566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
23149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
23159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
231666ed3db0SBarry Smith   PetscFunctionReturn(0);
231766ed3db0SBarry Smith }
231866ed3db0SBarry Smith 
2319ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2320*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2321*d71ae5a4SJacob Faibussowitsch {
2322ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2323ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2324d2840e09SBarry Smith   const PetscScalar *x, *v;
2325d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2326ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2327d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2328d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
2329ed1c418cSBarry Smith 
2330ed1c418cSBarry Smith   PetscFunctionBegin;
23319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
23329566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2333ed1c418cSBarry Smith   idx = a->j;
2334ed1c418cSBarry Smith   v   = a->a;
2335ed1c418cSBarry Smith   ii  = a->i;
2336ed1c418cSBarry Smith 
2337ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2338ed1c418cSBarry Smith     jrow  = ii[i];
2339ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2340ed1c418cSBarry Smith     sum1  = 0.0;
2341ed1c418cSBarry Smith     sum2  = 0.0;
2342ed1c418cSBarry Smith     sum3  = 0.0;
2343ed1c418cSBarry Smith     sum4  = 0.0;
2344ed1c418cSBarry Smith     sum5  = 0.0;
2345ed1c418cSBarry Smith     sum6  = 0.0;
2346ed1c418cSBarry Smith     sum7  = 0.0;
2347ed1c418cSBarry Smith     sum8  = 0.0;
2348ed1c418cSBarry Smith     sum9  = 0.0;
2349ed1c418cSBarry Smith     sum10 = 0.0;
2350ed1c418cSBarry Smith     sum11 = 0.0;
2351ed1c418cSBarry Smith     sum12 = 0.0;
2352ed1c418cSBarry Smith     sum13 = 0.0;
2353ed1c418cSBarry Smith     sum14 = 0.0;
2354ed1c418cSBarry Smith     sum15 = 0.0;
2355ed1c418cSBarry Smith     sum16 = 0.0;
2356ed1c418cSBarry Smith     sum17 = 0.0;
2357ed1c418cSBarry Smith     sum18 = 0.0;
235826fbe8dcSKarl Rupp 
2359ed1c418cSBarry Smith     nonzerorow += (n > 0);
2360ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2361ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2362ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2363ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2364ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2365ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2366ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2367ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2368ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2369ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2370ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2371ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2372ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2373ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2374ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2375ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2376ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2377ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2378ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2379ed1c418cSBarry Smith       jrow++;
2380ed1c418cSBarry Smith     }
2381ed1c418cSBarry Smith     y[18 * i]      = sum1;
2382ed1c418cSBarry Smith     y[18 * i + 1]  = sum2;
2383ed1c418cSBarry Smith     y[18 * i + 2]  = sum3;
2384ed1c418cSBarry Smith     y[18 * i + 3]  = sum4;
2385ed1c418cSBarry Smith     y[18 * i + 4]  = sum5;
2386ed1c418cSBarry Smith     y[18 * i + 5]  = sum6;
2387ed1c418cSBarry Smith     y[18 * i + 6]  = sum7;
2388ed1c418cSBarry Smith     y[18 * i + 7]  = sum8;
2389ed1c418cSBarry Smith     y[18 * i + 8]  = sum9;
2390ed1c418cSBarry Smith     y[18 * i + 9]  = sum10;
2391ed1c418cSBarry Smith     y[18 * i + 10] = sum11;
2392ed1c418cSBarry Smith     y[18 * i + 11] = sum12;
2393ed1c418cSBarry Smith     y[18 * i + 12] = sum13;
2394ed1c418cSBarry Smith     y[18 * i + 13] = sum14;
2395ed1c418cSBarry Smith     y[18 * i + 14] = sum15;
2396ed1c418cSBarry Smith     y[18 * i + 15] = sum16;
2397ed1c418cSBarry Smith     y[18 * i + 16] = sum17;
2398ed1c418cSBarry Smith     y[18 * i + 17] = sum18;
2399ed1c418cSBarry Smith   }
2400ed1c418cSBarry Smith 
24019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
24029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2404ed1c418cSBarry Smith   PetscFunctionReturn(0);
2405ed1c418cSBarry Smith }
2406ed1c418cSBarry Smith 
2407*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2408*d71ae5a4SJacob Faibussowitsch {
2409ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2410ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2411d2840e09SBarry Smith   const PetscScalar *x, *v;
2412d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2413ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2414d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2415d2840e09SBarry Smith   PetscInt           n, i;
2416ed1c418cSBarry Smith 
2417ed1c418cSBarry Smith   PetscFunctionBegin;
24189566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
24199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
24209566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2421ed1c418cSBarry Smith 
2422ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2423ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2424ed1c418cSBarry Smith     v       = a->a + a->i[i];
2425ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2426ed1c418cSBarry Smith     alpha1  = x[18 * i];
2427ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2428ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2429ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2430ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2431ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2432ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2433ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2434ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2435ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2436ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2437ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2438ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2439ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2440ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2441ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2442ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2443ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2444ed1c418cSBarry Smith     while (n-- > 0) {
2445ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2446ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2447ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2448ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2449ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2450ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2451ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2452ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2453ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2454ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2455ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2456ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2457ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2458ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2459ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2460ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2461ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2462ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
24639371c9d4SSatish Balay       idx++;
24649371c9d4SSatish Balay       v++;
2465ed1c418cSBarry Smith     }
2466ed1c418cSBarry Smith   }
24679566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2470ed1c418cSBarry Smith   PetscFunctionReturn(0);
2471ed1c418cSBarry Smith }
2472ed1c418cSBarry Smith 
2473*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2474*d71ae5a4SJacob Faibussowitsch {
2475ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2476ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2477d2840e09SBarry Smith   const PetscScalar *x, *v;
2478d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2479ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2480d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2481ed1c418cSBarry Smith   PetscInt           n, i, jrow, j;
2482ed1c418cSBarry Smith 
2483ed1c418cSBarry Smith   PetscFunctionBegin;
24849566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
24859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
24869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2487ed1c418cSBarry Smith   idx = a->j;
2488ed1c418cSBarry Smith   v   = a->a;
2489ed1c418cSBarry Smith   ii  = a->i;
2490ed1c418cSBarry Smith 
2491ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2492ed1c418cSBarry Smith     jrow  = ii[i];
2493ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2494ed1c418cSBarry Smith     sum1  = 0.0;
2495ed1c418cSBarry Smith     sum2  = 0.0;
2496ed1c418cSBarry Smith     sum3  = 0.0;
2497ed1c418cSBarry Smith     sum4  = 0.0;
2498ed1c418cSBarry Smith     sum5  = 0.0;
2499ed1c418cSBarry Smith     sum6  = 0.0;
2500ed1c418cSBarry Smith     sum7  = 0.0;
2501ed1c418cSBarry Smith     sum8  = 0.0;
2502ed1c418cSBarry Smith     sum9  = 0.0;
2503ed1c418cSBarry Smith     sum10 = 0.0;
2504ed1c418cSBarry Smith     sum11 = 0.0;
2505ed1c418cSBarry Smith     sum12 = 0.0;
2506ed1c418cSBarry Smith     sum13 = 0.0;
2507ed1c418cSBarry Smith     sum14 = 0.0;
2508ed1c418cSBarry Smith     sum15 = 0.0;
2509ed1c418cSBarry Smith     sum16 = 0.0;
2510ed1c418cSBarry Smith     sum17 = 0.0;
2511ed1c418cSBarry Smith     sum18 = 0.0;
2512ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2513ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2514ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2515ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2516ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2517ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2518ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2519ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2520ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2521ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2522ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2523ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2524ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2525ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2526ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2527ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2528ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2529ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2530ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2531ed1c418cSBarry Smith       jrow++;
2532ed1c418cSBarry Smith     }
2533ed1c418cSBarry Smith     y[18 * i] += sum1;
2534ed1c418cSBarry Smith     y[18 * i + 1] += sum2;
2535ed1c418cSBarry Smith     y[18 * i + 2] += sum3;
2536ed1c418cSBarry Smith     y[18 * i + 3] += sum4;
2537ed1c418cSBarry Smith     y[18 * i + 4] += sum5;
2538ed1c418cSBarry Smith     y[18 * i + 5] += sum6;
2539ed1c418cSBarry Smith     y[18 * i + 6] += sum7;
2540ed1c418cSBarry Smith     y[18 * i + 7] += sum8;
2541ed1c418cSBarry Smith     y[18 * i + 8] += sum9;
2542ed1c418cSBarry Smith     y[18 * i + 9] += sum10;
2543ed1c418cSBarry Smith     y[18 * i + 10] += sum11;
2544ed1c418cSBarry Smith     y[18 * i + 11] += sum12;
2545ed1c418cSBarry Smith     y[18 * i + 12] += sum13;
2546ed1c418cSBarry Smith     y[18 * i + 13] += sum14;
2547ed1c418cSBarry Smith     y[18 * i + 14] += sum15;
2548ed1c418cSBarry Smith     y[18 * i + 15] += sum16;
2549ed1c418cSBarry Smith     y[18 * i + 16] += sum17;
2550ed1c418cSBarry Smith     y[18 * i + 17] += sum18;
2551ed1c418cSBarry Smith   }
2552ed1c418cSBarry Smith 
25539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
25549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2556ed1c418cSBarry Smith   PetscFunctionReturn(0);
2557ed1c418cSBarry Smith }
2558ed1c418cSBarry Smith 
2559*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2560*d71ae5a4SJacob Faibussowitsch {
2561ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2562ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2563d2840e09SBarry Smith   const PetscScalar *x, *v;
2564d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2565ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2566d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2567d2840e09SBarry Smith   PetscInt           n, i;
2568ed1c418cSBarry Smith 
2569ed1c418cSBarry Smith   PetscFunctionBegin;
25709566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
25719566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2573ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2574ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2575ed1c418cSBarry Smith     v       = a->a + a->i[i];
2576ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2577ed1c418cSBarry Smith     alpha1  = x[18 * i];
2578ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2579ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2580ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2581ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2582ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2583ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2584ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2585ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2586ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2587ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2588ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2589ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2590ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2591ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2592ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2593ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2594ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2595ed1c418cSBarry Smith     while (n-- > 0) {
2596ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2597ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2598ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2599ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2600ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2601ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2602ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2603ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2604ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2605ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2606ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2607ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2608ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2609ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2610ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2611ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2612ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2613ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
26149371c9d4SSatish Balay       idx++;
26159371c9d4SSatish Balay       v++;
2616ed1c418cSBarry Smith     }
2617ed1c418cSBarry Smith   }
26189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
26199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2621ed1c418cSBarry Smith   PetscFunctionReturn(0);
2622ed1c418cSBarry Smith }
2623ed1c418cSBarry Smith 
2624*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2625*d71ae5a4SJacob Faibussowitsch {
26266861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26276861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26286861f193SBarry Smith   const PetscScalar *x, *v;
26296861f193SBarry Smith   PetscScalar       *y, *sums;
26306861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26316861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
26326861f193SBarry Smith 
26336861f193SBarry Smith   PetscFunctionBegin;
26349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26359566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
26369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
26376861f193SBarry Smith   idx = a->j;
26386861f193SBarry Smith   v   = a->a;
26396861f193SBarry Smith   ii  = a->i;
26406861f193SBarry Smith 
26416861f193SBarry Smith   for (i = 0; i < m; i++) {
26426861f193SBarry Smith     jrow = ii[i];
26436861f193SBarry Smith     n    = ii[i + 1] - jrow;
26446861f193SBarry Smith     sums = y + dof * i;
26456861f193SBarry Smith     for (j = 0; j < n; j++) {
2646ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
26476861f193SBarry Smith       jrow++;
26486861f193SBarry Smith     }
26496861f193SBarry Smith   }
26506861f193SBarry Smith 
26519566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
26546861f193SBarry Smith   PetscFunctionReturn(0);
26556861f193SBarry Smith }
26566861f193SBarry Smith 
2657*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2658*d71ae5a4SJacob Faibussowitsch {
26596861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26606861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26616861f193SBarry Smith   const PetscScalar *x, *v;
26626861f193SBarry Smith   PetscScalar       *y, *sums;
26636861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26646861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
26656861f193SBarry Smith 
26666861f193SBarry Smith   PetscFunctionBegin;
26679566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26689566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26699566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26706861f193SBarry Smith   idx = a->j;
26716861f193SBarry Smith   v   = a->a;
26726861f193SBarry Smith   ii  = a->i;
26736861f193SBarry Smith 
26746861f193SBarry Smith   for (i = 0; i < m; i++) {
26756861f193SBarry Smith     jrow = ii[i];
26766861f193SBarry Smith     n    = ii[i + 1] - jrow;
26776861f193SBarry Smith     sums = y + dof * i;
26786861f193SBarry Smith     for (j = 0; j < n; j++) {
2679ad540459SPierre Jolivet       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
26806861f193SBarry Smith       jrow++;
26816861f193SBarry Smith     }
26826861f193SBarry Smith   }
26836861f193SBarry Smith 
26849566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26876861f193SBarry Smith   PetscFunctionReturn(0);
26886861f193SBarry Smith }
26896861f193SBarry Smith 
2690*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2691*d71ae5a4SJacob Faibussowitsch {
26926861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26936861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26946861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26956861f193SBarry Smith   PetscScalar       *y;
26966861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26976861f193SBarry Smith   PetscInt           n, i, k;
26986861f193SBarry Smith 
26996861f193SBarry Smith   PetscFunctionBegin;
27009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
27019566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
27029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
27036861f193SBarry Smith   for (i = 0; i < m; i++) {
27046861f193SBarry Smith     idx   = a->j + a->i[i];
27056861f193SBarry Smith     v     = a->a + a->i[i];
27066861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
27076861f193SBarry Smith     alpha = x + dof * i;
27086861f193SBarry Smith     while (n-- > 0) {
2709ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
27109371c9d4SSatish Balay       idx++;
27119371c9d4SSatish Balay       v++;
27126861f193SBarry Smith     }
27136861f193SBarry Smith   }
27149566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
27159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
27169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
27176861f193SBarry Smith   PetscFunctionReturn(0);
27186861f193SBarry Smith }
27196861f193SBarry Smith 
2720*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2721*d71ae5a4SJacob Faibussowitsch {
27226861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
27236861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
27246861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
27256861f193SBarry Smith   PetscScalar       *y;
27266861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
27276861f193SBarry Smith   PetscInt           n, i, k;
27286861f193SBarry Smith 
27296861f193SBarry Smith   PetscFunctionBegin;
27309566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
27319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
27329566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
27336861f193SBarry Smith   for (i = 0; i < m; i++) {
27346861f193SBarry Smith     idx   = a->j + a->i[i];
27356861f193SBarry Smith     v     = a->a + a->i[i];
27366861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
27376861f193SBarry Smith     alpha = x + dof * i;
27386861f193SBarry Smith     while (n-- > 0) {
2739ad540459SPierre Jolivet       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
27409371c9d4SSatish Balay       idx++;
27419371c9d4SSatish Balay       v++;
27426861f193SBarry Smith     }
27436861f193SBarry Smith   }
27449566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
27459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
27469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
27476861f193SBarry Smith   PetscFunctionReturn(0);
27486861f193SBarry Smith }
27496861f193SBarry Smith 
2750f4a53059SBarry Smith /*===================================================================================*/
2751*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2752*d71ae5a4SJacob Faibussowitsch {
27534cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2754f4a53059SBarry Smith 
2755b24ad042SBarry Smith   PetscFunctionBegin;
2756f4a53059SBarry Smith   /* start the scatter */
27579566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27589566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
27599566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27609566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2761f4a53059SBarry Smith   PetscFunctionReturn(0);
2762f4a53059SBarry Smith }
2763f4a53059SBarry Smith 
2764*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2765*d71ae5a4SJacob Faibussowitsch {
27664cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2767b24ad042SBarry Smith 
27684cb9d9b8SBarry Smith   PetscFunctionBegin;
27699566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27709566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
27719566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27729566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27734cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27744cb9d9b8SBarry Smith }
27754cb9d9b8SBarry Smith 
2776*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2777*d71ae5a4SJacob Faibussowitsch {
27784cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
27794cb9d9b8SBarry Smith 
2780b24ad042SBarry Smith   PetscFunctionBegin;
27814cb9d9b8SBarry Smith   /* start the scatter */
27829566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27839566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
27849566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27859566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
27864cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27874cb9d9b8SBarry Smith }
27884cb9d9b8SBarry Smith 
2789*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2790*d71ae5a4SJacob Faibussowitsch {
27914cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2792b24ad042SBarry Smith 
27934cb9d9b8SBarry Smith   PetscFunctionBegin;
27949566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27959566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
27969566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27979566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27984cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27994cb9d9b8SBarry Smith }
28004cb9d9b8SBarry Smith 
280195ddefa2SHong Zhang /* ----------------------------------------------------------------*/
2802*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2803*d71ae5a4SJacob Faibussowitsch {
28044222ddf1SHong Zhang   Mat_Product *product = C->product;
28054222ddf1SHong Zhang 
28064222ddf1SHong Zhang   PetscFunctionBegin;
28074222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
28084222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
280998921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
28104222ddf1SHong Zhang   PetscFunctionReturn(0);
28114222ddf1SHong Zhang }
28124222ddf1SHong Zhang 
2813*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2814*d71ae5a4SJacob Faibussowitsch {
28154222ddf1SHong Zhang   Mat_Product *product = C->product;
28164222ddf1SHong Zhang   PetscBool    flg     = PETSC_FALSE;
28174222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
28184222ddf1SHong Zhang   PetscInt     alg = 1; /* set default algorithm */
28194222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
28204222ddf1SHong Zhang   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
28214222ddf1SHong Zhang   PetscInt    nalg        = 4;
28224222ddf1SHong Zhang #else
28234222ddf1SHong Zhang   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
28244222ddf1SHong Zhang   PetscInt    nalg        = 5;
28254222ddf1SHong Zhang #endif
28264222ddf1SHong Zhang 
28274222ddf1SHong Zhang   PetscFunctionBegin;
282808401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
28294222ddf1SHong Zhang 
28304222ddf1SHong Zhang   /* PtAP */
28314222ddf1SHong Zhang   /* Check matrix local sizes */
28329371c9d4SSatish Balay   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
28339371c9d4SSatish Balay              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
28349371c9d4SSatish Balay   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
28359371c9d4SSatish Balay              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
28364222ddf1SHong Zhang 
28374222ddf1SHong Zhang   /* Set the default algorithm */
28389566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
283948a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
28404222ddf1SHong Zhang 
28414222ddf1SHong Zhang   /* Get runtime option */
2842d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
28439566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
284448a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2845d0609cedSBarry Smith   PetscOptionsEnd();
28464222ddf1SHong Zhang 
28479566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
28484222ddf1SHong Zhang   if (flg) {
28494222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28504222ddf1SHong Zhang     PetscFunctionReturn(0);
28514222ddf1SHong Zhang   }
28524222ddf1SHong Zhang 
28539566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
28544222ddf1SHong Zhang   if (flg) {
28554222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28564222ddf1SHong Zhang     PetscFunctionReturn(0);
28574222ddf1SHong Zhang   }
28584222ddf1SHong Zhang 
28594222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
28609566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
28619566063dSJacob Faibussowitsch   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
28629566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
28634222ddf1SHong Zhang   PetscFunctionReturn(0);
28644222ddf1SHong Zhang }
28654222ddf1SHong Zhang 
28664222ddf1SHong Zhang /* ----------------------------------------------------------------*/
2867*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
2868*d71ae5a4SJacob Faibussowitsch {
28690298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
28707ba1a0bfSKris Buschelman   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
28717ba1a0bfSKris Buschelman   Mat                P  = pp->AIJ;
28727ba1a0bfSKris Buschelman   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2873d2840e09SBarry Smith   PetscInt          *pti, *ptj, *ptJ;
28747ba1a0bfSKris Buschelman   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2875d2840e09SBarry Smith   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2876d2840e09SBarry Smith   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
28777ba1a0bfSKris Buschelman   MatScalar         *ca;
2878d2840e09SBarry Smith   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
28797ba1a0bfSKris Buschelman 
28807ba1a0bfSKris Buschelman   PetscFunctionBegin;
28817ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28829566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
28837ba1a0bfSKris Buschelman 
28847ba1a0bfSKris Buschelman   cn = pn * ppdof;
28857ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28867ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28879566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn + 1, &ci));
28887ba1a0bfSKris Buschelman   ci[0] = 0;
28897ba1a0bfSKris Buschelman 
28907ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
28929566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow, an));
28939566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow, cn));
28947ba1a0bfSKris Buschelman 
28957ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28967ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28977ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28989566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
28997ba1a0bfSKris Buschelman   current_space = free_space;
29007ba1a0bfSKris Buschelman 
29017ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29027ba1a0bfSKris Buschelman   for (i = 0; i < pn; i++) {
29037ba1a0bfSKris Buschelman     ptnzi = pti[i + 1] - pti[i];
29047ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29057ba1a0bfSKris Buschelman     for (dof = 0; dof < ppdof; dof++) {
29067ba1a0bfSKris Buschelman       ptanzi = 0;
29077ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29087ba1a0bfSKris Buschelman       for (j = 0; j < ptnzi; j++) {
29097ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29107ba1a0bfSKris Buschelman         arow = ptJ[j] * ppdof + dof;
29117ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29127ba1a0bfSKris Buschelman         anzj = ai[arow + 1] - ai[arow];
29137ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29147ba1a0bfSKris Buschelman         for (k = 0; k < anzj; k++) {
29157ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29167ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29177ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29187ba1a0bfSKris Buschelman           }
29197ba1a0bfSKris Buschelman         }
29207ba1a0bfSKris Buschelman       }
29217ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29227ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29237ba1a0bfSKris Buschelman       cnzi = 0;
29247ba1a0bfSKris Buschelman       for (j = 0; j < ptanzi; j++) {
29257ba1a0bfSKris Buschelman         /* Get offset within block of P */
29267ba1a0bfSKris Buschelman         pshift = *ptaj % ppdof;
29277ba1a0bfSKris Buschelman         /* Get block row of P */
29287ba1a0bfSKris Buschelman         prow = (*ptaj++) / ppdof; /* integer division */
29297ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29307ba1a0bfSKris Buschelman         pnzj = pi[prow + 1] - pi[prow];
29317ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29327ba1a0bfSKris Buschelman         for (k = 0; k < pnzj; k++) {
29337ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29347ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29357ba1a0bfSKris Buschelman           if (!denserow[pjj[k] * ppdof + pshift]) {
29367ba1a0bfSKris Buschelman             denserow[pjj[k] * ppdof + pshift] = -1;
29377ba1a0bfSKris Buschelman             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
29387ba1a0bfSKris Buschelman           }
29397ba1a0bfSKris Buschelman         }
29407ba1a0bfSKris Buschelman       }
29417ba1a0bfSKris Buschelman 
29427ba1a0bfSKris Buschelman       /* sort sparserow */
29439566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi, sparserow));
29447ba1a0bfSKris Buschelman 
29457ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29467ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
294748a46eb9SPierre Jolivet       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
29487ba1a0bfSKris Buschelman 
29497ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29509566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
295126fbe8dcSKarl Rupp 
29527ba1a0bfSKris Buschelman       current_space->array += cnzi;
29537ba1a0bfSKris Buschelman       current_space->local_used += cnzi;
29547ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29557ba1a0bfSKris Buschelman 
295626fbe8dcSKarl Rupp       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
295726fbe8dcSKarl Rupp       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
295826fbe8dcSKarl Rupp 
29597ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29607ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29617ba1a0bfSKris Buschelman       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
29627ba1a0bfSKris Buschelman     }
29637ba1a0bfSKris Buschelman   }
29647ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29657ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29667ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29679566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
29689566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
29699566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
29707ba1a0bfSKris Buschelman 
29717ba1a0bfSKris Buschelman   /* Allocate space for ca */
29729566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
29737ba1a0bfSKris Buschelman 
29747ba1a0bfSKris Buschelman   /* put together the new matrix */
29759566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
29769566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C, pp->dof));
29777ba1a0bfSKris Buschelman 
29787ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29797ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29804222ddf1SHong Zhang   c          = (Mat_SeqAIJ *)(C->data);
2981e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2982e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29837ba1a0bfSKris Buschelman   c->nonew   = 0;
298426fbe8dcSKarl Rupp 
29854222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29864222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29877ba1a0bfSKris Buschelman 
29887ba1a0bfSKris Buschelman   /* Clean up. */
29899566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
29907ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29917ba1a0bfSKris Buschelman }
29927ba1a0bfSKris Buschelman 
2993*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
2994*d71ae5a4SJacob Faibussowitsch {
29957ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29967ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
29977ba1a0bfSKris Buschelman   Mat              P  = pp->AIJ;
29987ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
29997ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
30007ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
3001a2ea699eSBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
3002d2840e09SBarry Smith   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
3003d2840e09SBarry Smith   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
3004d2840e09SBarry Smith   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
3005a2ea699eSBarry Smith   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
3006d2840e09SBarry Smith   MatScalar       *ca = c->a, *caj, *apa;
30077ba1a0bfSKris Buschelman 
30087ba1a0bfSKris Buschelman   PetscFunctionBegin;
30097ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30109566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
30117ba1a0bfSKris Buschelman 
30127ba1a0bfSKris Buschelman   /* Clear old values in C */
30139566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca, ci[cm]));
30147ba1a0bfSKris Buschelman 
30157ba1a0bfSKris Buschelman   for (i = 0; i < am; i++) {
30167ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30177ba1a0bfSKris Buschelman     anzi  = ai[i + 1] - ai[i];
30187ba1a0bfSKris Buschelman     apnzj = 0;
30197ba1a0bfSKris Buschelman     for (j = 0; j < anzi; j++) {
30207ba1a0bfSKris Buschelman       /* Get offset within block of P */
30217ba1a0bfSKris Buschelman       pshift = *aj % ppdof;
30227ba1a0bfSKris Buschelman       /* Get block row of P */
30237ba1a0bfSKris Buschelman       prow = *aj++ / ppdof; /* integer division */
30247ba1a0bfSKris Buschelman       pnzj = pi[prow + 1] - pi[prow];
30257ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30267ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30277ba1a0bfSKris Buschelman       for (k = 0; k < pnzj; k++) {
30287ba1a0bfSKris Buschelman         poffset = pjj[k] * ppdof + pshift;
30297ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30307ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30317ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30327ba1a0bfSKris Buschelman         }
30337ba1a0bfSKris Buschelman         apa[poffset] += (*aa) * paj[k];
30347ba1a0bfSKris Buschelman       }
30359566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * pnzj));
30367ba1a0bfSKris Buschelman       aa++;
30377ba1a0bfSKris Buschelman     }
30387ba1a0bfSKris Buschelman 
30397ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30407ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30419566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj, apj));
30427ba1a0bfSKris Buschelman 
30437ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30447ba1a0bfSKris Buschelman     prow    = i / ppdof; /* integer division */
30457ba1a0bfSKris Buschelman     pshift  = i % ppdof;
30467ba1a0bfSKris Buschelman     poffset = pi[prow];
30477ba1a0bfSKris Buschelman     pnzi    = pi[prow + 1] - poffset;
30487ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30497ba1a0bfSKris Buschelman     pJ = pj + poffset;
30507ba1a0bfSKris Buschelman     pA = pa + poffset;
30517ba1a0bfSKris Buschelman     for (j = 0; j < pnzi; j++) {
30527ba1a0bfSKris Buschelman       crow = (*pJ) * ppdof + pshift;
30537ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30547ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30557ba1a0bfSKris Buschelman       pJ++;
30567ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30577ba1a0bfSKris Buschelman       for (k = 0, nextap = 0; nextap < apnzj; k++) {
305826fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
30597ba1a0bfSKris Buschelman       }
30609566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * apnzj));
30617ba1a0bfSKris Buschelman       pA++;
30627ba1a0bfSKris Buschelman     }
30637ba1a0bfSKris Buschelman 
30647ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30657ba1a0bfSKris Buschelman     for (j = 0; j < apnzj; j++) {
30667ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30677ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30687ba1a0bfSKris Buschelman     }
30697ba1a0bfSKris Buschelman   }
30707ba1a0bfSKris Buschelman 
30717ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
30739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
30749566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa, apj, apjdense));
307595ddefa2SHong Zhang   PetscFunctionReturn(0);
307695ddefa2SHong Zhang }
30777ba1a0bfSKris Buschelman 
3078*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3079*d71ae5a4SJacob Faibussowitsch {
30804222ddf1SHong Zhang   Mat_Product *product = C->product;
30814222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30822121bac1SHong Zhang 
30832121bac1SHong Zhang   PetscFunctionBegin;
30849566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
30852121bac1SHong Zhang   PetscFunctionReturn(0);
30862121bac1SHong Zhang }
30872121bac1SHong Zhang 
3088*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C)
3089*d71ae5a4SJacob Faibussowitsch {
309095ddefa2SHong Zhang   PetscFunctionBegin;
30913e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
309295ddefa2SHong Zhang }
309395ddefa2SHong Zhang 
3094*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C)
3095*d71ae5a4SJacob Faibussowitsch {
309695ddefa2SHong Zhang   PetscFunctionBegin;
30973e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
30987ba1a0bfSKris Buschelman }
30995c65b9ecSFande Kong 
3100bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3101bc8e477aSFande Kong 
3102*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
3103*d71ae5a4SJacob Faibussowitsch {
3104bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3105bc8e477aSFande Kong 
3106bc8e477aSFande Kong   PetscFunctionBegin;
310734bcad68SFande Kong 
31089566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3109bc8e477aSFande Kong   PetscFunctionReturn(0);
3110bc8e477aSFande Kong }
3111bc8e477aSFande Kong 
31124222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3113bc8e477aSFande Kong 
3114*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
3115*d71ae5a4SJacob Faibussowitsch {
3116bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3117bc8e477aSFande Kong 
3118bc8e477aSFande Kong   PetscFunctionBegin;
31199566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
31204222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3121bc8e477aSFande Kong   PetscFunctionReturn(0);
3122bc8e477aSFande Kong }
3123bc8e477aSFande Kong 
3124bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3125bc8e477aSFande Kong 
3126*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
3127*d71ae5a4SJacob Faibussowitsch {
3128bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3129bc8e477aSFande Kong 
3130bc8e477aSFande Kong   PetscFunctionBegin;
313134bcad68SFande Kong 
31329566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3133bc8e477aSFande Kong   PetscFunctionReturn(0);
3134bc8e477aSFande Kong }
3135bc8e477aSFande Kong 
31364222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3137bc8e477aSFande Kong 
3138*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
3139*d71ae5a4SJacob Faibussowitsch {
3140bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3141bc8e477aSFande Kong 
3142bc8e477aSFande Kong   PetscFunctionBegin;
314334bcad68SFande Kong 
31449566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
31454222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3146bc8e477aSFande Kong   PetscFunctionReturn(0);
3147bc8e477aSFande Kong }
3148bc8e477aSFande Kong 
3149*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3150*d71ae5a4SJacob Faibussowitsch {
31514222ddf1SHong Zhang   Mat_Product *product = C->product;
31524222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
31534222ddf1SHong Zhang   PetscBool    flg;
3154bc8e477aSFande Kong 
3155bc8e477aSFande Kong   PetscFunctionBegin;
31569566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
31574222ddf1SHong Zhang   if (flg) {
31589566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
31594222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31604222ddf1SHong Zhang     PetscFunctionReturn(0);
3161bc8e477aSFande Kong   }
316234bcad68SFande Kong 
31639566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
31644222ddf1SHong Zhang   if (flg) {
31659566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
31664222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31674222ddf1SHong Zhang     PetscFunctionReturn(0);
31684222ddf1SHong Zhang   }
316934bcad68SFande Kong 
31704222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3171bc8e477aSFande Kong }
3172bc8e477aSFande Kong 
3173*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3174*d71ae5a4SJacob Faibussowitsch {
31759c4fc4c7SBarry Smith   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3176ceb03754SKris Buschelman   Mat          a   = b->AIJ, B;
31779c4fc4c7SBarry Smith   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
31780fd73130SBarry Smith   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3179ba8c8a56SBarry Smith   PetscInt    *cols;
3180ba8c8a56SBarry Smith   PetscScalar *vals;
31819c4fc4c7SBarry Smith 
31829c4fc4c7SBarry Smith   PetscFunctionBegin;
31839566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a, &m, &n));
31849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof * m, &ilen));
31859c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31869c4fc4c7SBarry Smith     nmax = PetscMax(nmax, aij->ilen[i]);
318726fbe8dcSKarl Rupp     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
31889c4fc4c7SBarry Smith   }
31899566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
31909566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
31919566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31929566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
31939566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax, &icols));
31959c4fc4c7SBarry Smith   ii = 0;
31969c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31979566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31980fd73130SBarry Smith     for (j = 0; j < dof; j++) {
319926fbe8dcSKarl Rupp       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
32009566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
32019c4fc4c7SBarry Smith       ii++;
32029c4fc4c7SBarry Smith     }
32039566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
32049c4fc4c7SBarry Smith   }
32059566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
32069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
32079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3208ceb03754SKris Buschelman 
3209511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32109566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
3211c3d102feSKris Buschelman   } else {
3212ceb03754SKris Buschelman     *newmat = B;
3213c3d102feSKris Buschelman   }
32149c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32159c4fc4c7SBarry Smith }
32169c4fc4c7SBarry Smith 
3217c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3218be1d678aSKris Buschelman 
3219*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3220*d71ae5a4SJacob Faibussowitsch {
32210fd73130SBarry Smith   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3222ceb03754SKris Buschelman   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
32230fd73130SBarry Smith   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
32240fd73130SBarry Smith   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
32250fd73130SBarry Smith   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
32260fd73130SBarry Smith   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
32270298fd71SBarry Smith   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
32280298fd71SBarry Smith   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
32290fd73130SBarry Smith   PetscInt     rstart, cstart, *garray, ii, k;
32300fd73130SBarry Smith   PetscScalar *vals, *ovals;
32310fd73130SBarry Smith 
32320fd73130SBarry Smith   PetscFunctionBegin;
32339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3234d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
32350fd73130SBarry Smith     nmax  = PetscMax(nmax, AIJ->ilen[i]);
32360fd73130SBarry Smith     onmax = PetscMax(onmax, OAIJ->ilen[i]);
32370fd73130SBarry Smith     for (j = 0; j < dof; j++) {
32380fd73130SBarry Smith       dnz[dof * i + j] = AIJ->ilen[i];
32390fd73130SBarry Smith       onz[dof * i + j] = OAIJ->ilen[i];
32400fd73130SBarry Smith     }
32410fd73130SBarry Smith   }
32429566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
32439566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
32449566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
32459566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
32469566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, dof));
32479566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz, onz));
32480fd73130SBarry Smith 
32499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3250d0f46423SBarry Smith   rstart = dof * maij->A->rmap->rstart;
3251d0f46423SBarry Smith   cstart = dof * maij->A->cmap->rstart;
32520fd73130SBarry Smith   garray = mpiaij->garray;
32530fd73130SBarry Smith 
32540fd73130SBarry Smith   ii = rstart;
3255d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
32569566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
32579566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
32580fd73130SBarry Smith     for (j = 0; j < dof; j++) {
3259ad540459SPierre Jolivet       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3260ad540459SPierre Jolivet       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
32619566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
32629566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
32630fd73130SBarry Smith       ii++;
32640fd73130SBarry Smith     }
32659566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
32669566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
32670fd73130SBarry Smith   }
32689566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols, oicols));
32690fd73130SBarry Smith 
32709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
32719566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3272ceb03754SKris Buschelman 
3273511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32747adad957SLisandro Dalcin     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32757adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
327626fbe8dcSKarl Rupp 
32779566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
327826fbe8dcSKarl Rupp 
32797adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3280c3d102feSKris Buschelman   } else {
3281ceb03754SKris Buschelman     *newmat = B;
3282c3d102feSKris Buschelman   }
32830fd73130SBarry Smith   PetscFunctionReturn(0);
32840fd73130SBarry Smith }
32850fd73130SBarry Smith 
3286*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
3287*d71ae5a4SJacob Faibussowitsch {
32889e516c8fSBarry Smith   Mat A;
32899e516c8fSBarry Smith 
32909e516c8fSBarry Smith   PetscFunctionBegin;
32919566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32929566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
32939566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
32949e516c8fSBarry Smith   PetscFunctionReturn(0);
32959e516c8fSBarry Smith }
32960fd73130SBarry Smith 
3297*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
3298*d71ae5a4SJacob Faibussowitsch {
3299ec626240SStefano Zampini   Mat A;
3300ec626240SStefano Zampini 
3301ec626240SStefano Zampini   PetscFunctionBegin;
33029566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
33039566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
33049566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3305ec626240SStefano Zampini   PetscFunctionReturn(0);
3306ec626240SStefano Zampini }
3307ec626240SStefano Zampini 
3308bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3309480dffcfSJed Brown /*@
33100bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33110bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
331211a5261eSBarry Smith   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
331311a5261eSBarry Smith   and `MATMPIAIJ` for distributed matrices.
33140bad9183SKris Buschelman 
3315ff585edeSJed Brown   Collective
3316ff585edeSJed Brown 
3317ff585edeSJed Brown   Input Parameters:
331811a5261eSBarry Smith + A - the `MATAIJ` matrix describing the action on blocks
3319ff585edeSJed Brown - dof - the block size (number of components per node)
3320ff585edeSJed Brown 
3321ff585edeSJed Brown   Output Parameter:
332211a5261eSBarry Smith . maij - the new `MATMAIJ` matrix
3323ff585edeSJed Brown 
33240bad9183SKris Buschelman   Operations provided:
332567b8a455SSatish Balay .vb
332611a5261eSBarry Smith     MatMult()
332711a5261eSBarry Smith     MatMultTranspose()
332811a5261eSBarry Smith     MatMultAdd()
332911a5261eSBarry Smith     MatMultTransposeAdd()
333011a5261eSBarry Smith     MatView()
333167b8a455SSatish Balay .ve
33320bad9183SKris Buschelman 
33330bad9183SKris Buschelman   Level: advanced
33340bad9183SKris Buschelman 
333511a5261eSBarry Smith .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3336ff585edeSJed Brown @*/
3337*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
3338*d71ae5a4SJacob Faibussowitsch {
3339b24ad042SBarry Smith   PetscInt  n;
334082b1193eSBarry Smith   Mat       B;
334190f67b22SBarry Smith   PetscBool flg;
3342fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3343fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3344fdc842d1SBarry Smith   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3345fdc842d1SBarry Smith #endif
334682b1193eSBarry Smith 
334782b1193eSBarry Smith   PetscFunctionBegin;
3348fdc842d1SBarry Smith   dof = PetscAbs(dof);
33499566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3350d72c5c08SBarry Smith 
335126fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
335226fbe8dcSKarl Rupp   else {
33539566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3354c248e2fdSStefano Zampini     /* propagate vec type */
33559566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B, A->defaultvectype));
33569566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
33579566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
33589566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
33599566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
33609566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
336126fbe8dcSKarl Rupp 
3362cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3363d72c5c08SBarry Smith 
336490f67b22SBarry Smith     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
336590f67b22SBarry Smith     if (flg) {
3366feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3367feb9fe23SJed Brown 
33689566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATSEQMAIJ));
336926fbe8dcSKarl Rupp 
33700298fd71SBarry Smith       B->ops->setup   = NULL;
33714cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33720fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
33734222ddf1SHong Zhang 
3374feb9fe23SJed Brown       b      = (Mat_SeqMAIJ *)B->data;
3375b9b97703SBarry Smith       b->dof = dof;
33764cb9d9b8SBarry Smith       b->AIJ = A;
337726fbe8dcSKarl Rupp 
3378d72c5c08SBarry Smith       if (dof == 2) {
3379cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3380cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3381cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3382cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3383bcc973b7SBarry Smith       } else if (dof == 3) {
3384bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3385bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3386bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3387bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3388bcc973b7SBarry Smith       } else if (dof == 4) {
3389bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3390bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3391bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3392bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3393f9fae5adSBarry Smith       } else if (dof == 5) {
3394f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3395f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3396f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3397f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33986cd98798SBarry Smith       } else if (dof == 6) {
33996cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34006cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34016cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34026cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3403ed8eea03SBarry Smith       } else if (dof == 7) {
3404ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3405ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3406ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3407ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
340866ed3db0SBarry Smith       } else if (dof == 8) {
340966ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
341066ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
341166ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
341266ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34132b692628SMatthew Knepley       } else if (dof == 9) {
34142b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34152b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34162b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34172b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3418b9cda22cSBarry Smith       } else if (dof == 10) {
3419b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3420b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3421b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3422b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3423dbdd7285SBarry Smith       } else if (dof == 11) {
3424dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3425dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3426dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3427dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34282f7816d4SBarry Smith       } else if (dof == 16) {
34292f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34302f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34312f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34322f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3433ed1c418cSBarry Smith       } else if (dof == 18) {
3434ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3435ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3436ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3437ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
343882b1193eSBarry Smith       } else {
34396861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34406861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34416861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34426861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
344382b1193eSBarry Smith       }
3444fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34459566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3446fdc842d1SBarry Smith #endif
34479566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
34489566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3449f4a53059SBarry Smith     } else {
3450f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3451feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3452f4a53059SBarry Smith       IS           from, to;
3453f4a53059SBarry Smith       Vec          gvec;
3454f4a53059SBarry Smith 
34559566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATMPIMAIJ));
345626fbe8dcSKarl Rupp 
34570298fd71SBarry Smith       B->ops->setup   = NULL;
3458d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34590fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
346026fbe8dcSKarl Rupp 
3461b9b97703SBarry Smith       b      = (Mat_MPIMAIJ *)B->data;
3462b9b97703SBarry Smith       b->dof = dof;
3463b9b97703SBarry Smith       b->A   = A;
346426fbe8dcSKarl Rupp 
34659566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
34669566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
34674cb9d9b8SBarry Smith 
34689566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec, &n));
34699566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
34709566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
34719566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w, dof));
34729566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w, VECSEQ));
3473f4a53059SBarry Smith 
3474f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
34759566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
34769566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3477f4a53059SBarry Smith 
3478f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
34799566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3480f4a53059SBarry Smith 
3481f4a53059SBarry Smith       /* generate the scatter context */
34829566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3483f4a53059SBarry Smith 
34849566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34859566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34869566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3487f4a53059SBarry Smith 
3488f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34894cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34904cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34914cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
349226fbe8dcSKarl Rupp 
3493fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34949566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3495fdc842d1SBarry Smith #endif
34969566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
34979566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3498f4a53059SBarry Smith     }
34997dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3500ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
35019566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3502fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3503fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3504fdc842d1SBarry Smith     {
3505fdc842d1SBarry Smith       PetscBool flg;
3506fdc842d1SBarry Smith       if (convert) {
35079566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
350848a46eb9SPierre Jolivet         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3509fdc842d1SBarry Smith       }
3510fdc842d1SBarry Smith     }
3511fdc842d1SBarry Smith #endif
3512cd3bbe55SBarry Smith     *maij = B;
3513d72c5c08SBarry Smith   }
351482b1193eSBarry Smith   PetscFunctionReturn(0);
351582b1193eSBarry Smith }
3516