xref: /petsc/src/mat/impls/maij/maij.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38db781477SPatrick Sanan .seealso: `MatCreateMAIJ()`
39ff585edeSJed Brown @*/
409371c9d4SSatish Balay PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) {
41ace3abfcSBarry Smith   PetscBool ismpimaij, isseqmaij;
42b9b97703SBarry Smith 
43b9b97703SBarry Smith   PetscFunctionBegin;
449566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
459566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
46b9b97703SBarry Smith   if (ismpimaij) {
47b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
48b9b97703SBarry Smith 
49b9b97703SBarry Smith     *B = b->A;
50b9b97703SBarry Smith   } else if (isseqmaij) {
51b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->AIJ;
54b9b97703SBarry Smith   } else {
55b9b97703SBarry Smith     *B = A;
56b9b97703SBarry Smith   }
57b9b97703SBarry Smith   PetscFunctionReturn(0);
58b9b97703SBarry Smith }
59b9b97703SBarry Smith 
60480dffcfSJed Brown /*@
61ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
62ff585edeSJed Brown 
633f9fe445SBarry Smith    Logically Collective
64ff585edeSJed Brown 
65d8d19677SJose E. Roman    Input Parameters:
66ff585edeSJed Brown +  A - the MAIJ matrix
67ff585edeSJed Brown -  dof - the block size for the new matrix
68ff585edeSJed Brown 
69ff585edeSJed Brown    Output Parameter:
70ff585edeSJed Brown .  B - the new MAIJ matrix
71ff585edeSJed Brown 
72ff585edeSJed Brown    Level: advanced
73ff585edeSJed Brown 
74db781477SPatrick Sanan .seealso: `MatCreateMAIJ()`
75ff585edeSJed Brown @*/
769371c9d4SSatish Balay PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) {
770298fd71SBarry Smith   Mat Aij = NULL;
78b9b97703SBarry Smith 
79b9b97703SBarry Smith   PetscFunctionBegin;
80c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A, dof, 2);
819566063dSJacob Faibussowitsch   PetscCall(MatMAIJGetAIJ(A, &Aij));
829566063dSJacob Faibussowitsch   PetscCall(MatCreateMAIJ(Aij, dof, B));
83b9b97703SBarry Smith   PetscFunctionReturn(0);
84b9b97703SBarry Smith }
85b9b97703SBarry Smith 
869371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqMAIJ(Mat A) {
874cb9d9b8SBarry Smith   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
8882b1193eSBarry Smith 
8982b1193eSBarry Smith   PetscFunctionBegin;
909566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
919566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
922e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
954cb9d9b8SBarry Smith   PetscFunctionReturn(0);
964cb9d9b8SBarry Smith }
974cb9d9b8SBarry Smith 
989371c9d4SSatish Balay PetscErrorCode MatSetUp_MAIJ(Mat A) {
99356c569eSBarry Smith   PetscFunctionBegin;
100ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
101356c569eSBarry Smith }
102356c569eSBarry Smith 
1039371c9d4SSatish Balay PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) {
1040fd73130SBarry Smith   Mat B;
1050fd73130SBarry Smith 
1060fd73130SBarry Smith   PetscFunctionBegin;
1079566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1089566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1099566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1100fd73130SBarry Smith   PetscFunctionReturn(0);
1110fd73130SBarry Smith }
1120fd73130SBarry Smith 
1139371c9d4SSatish Balay PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) {
1140fd73130SBarry Smith   Mat B;
1150fd73130SBarry Smith 
1160fd73130SBarry Smith   PetscFunctionBegin;
1179566063dSJacob Faibussowitsch   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
1189566063dSJacob Faibussowitsch   PetscCall(MatView(B, viewer));
1199566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
1200fd73130SBarry Smith   PetscFunctionReturn(0);
1210fd73130SBarry Smith }
1220fd73130SBarry Smith 
1239371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIMAIJ(Mat A) {
1244cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
1254cb9d9b8SBarry Smith 
1264cb9d9b8SBarry Smith   PetscFunctionBegin;
1279566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->AIJ));
1289566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->OAIJ));
1299566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->A));
1309566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->ctx));
1319566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->w));
1329566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1332e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
1349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
1359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
1369566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
137b9b97703SBarry Smith   PetscFunctionReturn(0);
138b9b97703SBarry Smith }
139b9b97703SBarry Smith 
1400bad9183SKris Buschelman /*MC
141fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1420bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1430bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1440bad9183SKris Buschelman 
1450bad9183SKris Buschelman   Operations provided:
14667b8a455SSatish Balay .vb
14767b8a455SSatish Balay     MatMult
14867b8a455SSatish Balay     MatMultTranspose
14967b8a455SSatish Balay     MatMultAdd
15067b8a455SSatish Balay     MatMultTransposeAdd
15167b8a455SSatish Balay .ve
1520bad9183SKris Buschelman 
1530bad9183SKris Buschelman   Level: advanced
1540bad9183SKris Buschelman 
155db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
1560bad9183SKris Buschelman M*/
1570bad9183SKris Buschelman 
1589371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) {
1594cb9d9b8SBarry Smith   Mat_MPIMAIJ *b;
16064b52464SHong Zhang   PetscMPIInt  size;
16182b1193eSBarry Smith 
16282b1193eSBarry Smith   PetscFunctionBegin;
1639566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A, &b));
164b0a32e0cSBarry Smith   A->data = (void *)b;
16526fbe8dcSKarl Rupp 
1669566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
16726fbe8dcSKarl Rupp 
168356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
169f4a53059SBarry Smith 
170f4259b30SLisandro Dalcin   b->AIJ  = NULL;
171cd3bbe55SBarry Smith   b->dof  = 0;
172f4259b30SLisandro Dalcin   b->OAIJ = NULL;
173f4259b30SLisandro Dalcin   b->ctx  = NULL;
174f4259b30SLisandro Dalcin   b->w    = NULL;
1759566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
17664b52464SHong Zhang   if (size == 1) {
1779566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
17864b52464SHong Zhang   } else {
1799566063dSJacob Faibussowitsch     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
18064b52464SHong Zhang   }
18132e7c8b0SBarry Smith   A->preallocated = PETSC_TRUE;
18232e7c8b0SBarry Smith   A->assembled    = PETSC_TRUE;
18382b1193eSBarry Smith   PetscFunctionReturn(0);
18482b1193eSBarry Smith }
18582b1193eSBarry Smith 
186cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1879371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
1884cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
189bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
190d2840e09SBarry Smith   const PetscScalar *x, *v;
191d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
192d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
193d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
19482b1193eSBarry Smith 
195bcc973b7SBarry Smith   PetscFunctionBegin;
1969566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
198bcc973b7SBarry Smith   idx = a->j;
199bcc973b7SBarry Smith   v   = a->a;
200bcc973b7SBarry Smith   ii  = a->i;
201bcc973b7SBarry Smith 
202bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
203bcc973b7SBarry Smith     jrow = ii[i];
204bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
205bcc973b7SBarry Smith     sum1 = 0.0;
206bcc973b7SBarry Smith     sum2 = 0.0;
20726fbe8dcSKarl Rupp 
208b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
209bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
210bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
211bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
212bcc973b7SBarry Smith       jrow++;
213bcc973b7SBarry Smith     }
214bcc973b7SBarry Smith     y[2 * i]     = sum1;
215bcc973b7SBarry Smith     y[2 * i + 1] = sum2;
216bcc973b7SBarry Smith   }
217bcc973b7SBarry Smith 
2189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
2199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
22182b1193eSBarry Smith   PetscFunctionReturn(0);
22282b1193eSBarry Smith }
223bcc973b7SBarry Smith 
2249371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) {
2254cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
226bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
227d2840e09SBarry Smith   const PetscScalar *x, *v;
228d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
229d2840e09SBarry Smith   PetscInt           n, i;
230d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
23182b1193eSBarry Smith 
232bcc973b7SBarry Smith   PetscFunctionBegin;
2339566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
2349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2359566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
236bfec09a0SHong Zhang 
237bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
238bfec09a0SHong Zhang     idx    = a->j + a->i[i];
239bfec09a0SHong Zhang     v      = a->a + a->i[i];
240bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
241bcc973b7SBarry Smith     alpha1 = x[2 * i];
242bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
24326fbe8dcSKarl Rupp     while (n-- > 0) {
24426fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
24526fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
2469371c9d4SSatish Balay       idx++;
2479371c9d4SSatish Balay       v++;
24826fbe8dcSKarl Rupp     }
249bcc973b7SBarry Smith   }
2509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
25382b1193eSBarry Smith   PetscFunctionReturn(0);
25482b1193eSBarry Smith }
255bcc973b7SBarry Smith 
2569371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2574cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
258bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
259d2840e09SBarry Smith   const PetscScalar *x, *v;
260d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
261b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
262d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26382b1193eSBarry Smith 
264bcc973b7SBarry Smith   PetscFunctionBegin;
2659566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
2669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
268bcc973b7SBarry Smith   idx = a->j;
269bcc973b7SBarry Smith   v   = a->a;
270bcc973b7SBarry Smith   ii  = a->i;
271bcc973b7SBarry Smith 
272bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
273bcc973b7SBarry Smith     jrow = ii[i];
274bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
275bcc973b7SBarry Smith     sum1 = 0.0;
276bcc973b7SBarry Smith     sum2 = 0.0;
277bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
278bcc973b7SBarry Smith       sum1 += v[jrow] * x[2 * idx[jrow]];
279bcc973b7SBarry Smith       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
280bcc973b7SBarry Smith       jrow++;
281bcc973b7SBarry Smith     }
282bcc973b7SBarry Smith     y[2 * i] += sum1;
283bcc973b7SBarry Smith     y[2 * i + 1] += sum2;
284bcc973b7SBarry Smith   }
285bcc973b7SBarry Smith 
2869566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
2879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
289bcc973b7SBarry Smith   PetscFunctionReturn(0);
29082b1193eSBarry Smith }
2919371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) {
2924cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
293bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
294d2840e09SBarry Smith   const PetscScalar *x, *v;
295d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2;
296d2840e09SBarry Smith   PetscInt           n, i;
297d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
29882b1193eSBarry Smith 
299bcc973b7SBarry Smith   PetscFunctionBegin;
3009566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
3019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
303bfec09a0SHong Zhang 
304bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
305bfec09a0SHong Zhang     idx    = a->j + a->i[i];
306bfec09a0SHong Zhang     v      = a->a + a->i[i];
307bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
308bcc973b7SBarry Smith     alpha1 = x[2 * i];
309bcc973b7SBarry Smith     alpha2 = x[2 * i + 1];
31026fbe8dcSKarl Rupp     while (n-- > 0) {
31126fbe8dcSKarl Rupp       y[2 * (*idx)] += alpha1 * (*v);
31226fbe8dcSKarl Rupp       y[2 * (*idx) + 1] += alpha2 * (*v);
3139371c9d4SSatish Balay       idx++;
3149371c9d4SSatish Balay       v++;
31526fbe8dcSKarl Rupp     }
316bcc973b7SBarry Smith   }
3179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(4.0 * a->nz));
3189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
320bcc973b7SBarry Smith   PetscFunctionReturn(0);
32182b1193eSBarry Smith }
322cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3239371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3244cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
325bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
326d2840e09SBarry Smith   const PetscScalar *x, *v;
327d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
328d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
329d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
33082b1193eSBarry Smith 
331bcc973b7SBarry Smith   PetscFunctionBegin;
3329566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3339566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
334bcc973b7SBarry Smith   idx = a->j;
335bcc973b7SBarry Smith   v   = a->a;
336bcc973b7SBarry Smith   ii  = a->i;
337bcc973b7SBarry Smith 
338bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
339bcc973b7SBarry Smith     jrow = ii[i];
340bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
341bcc973b7SBarry Smith     sum1 = 0.0;
342bcc973b7SBarry Smith     sum2 = 0.0;
343bcc973b7SBarry Smith     sum3 = 0.0;
34426fbe8dcSKarl Rupp 
345b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
346bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
347bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
348bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
349bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
350bcc973b7SBarry Smith       jrow++;
351bcc973b7SBarry Smith     }
352bcc973b7SBarry Smith     y[3 * i]     = sum1;
353bcc973b7SBarry Smith     y[3 * i + 1] = sum2;
354bcc973b7SBarry Smith     y[3 * i + 2] = sum3;
355bcc973b7SBarry Smith   }
356bcc973b7SBarry Smith 
3579566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
3589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
360bcc973b7SBarry Smith   PetscFunctionReturn(0);
361bcc973b7SBarry Smith }
362bcc973b7SBarry Smith 
3639371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) {
3644cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
365bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
366d2840e09SBarry Smith   const PetscScalar *x, *v;
367d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
368d2840e09SBarry Smith   PetscInt           n, i;
369d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
370bcc973b7SBarry Smith 
371bcc973b7SBarry Smith   PetscFunctionBegin;
3729566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
3739566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3749566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
375bfec09a0SHong Zhang 
376bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
377bfec09a0SHong Zhang     idx    = a->j + a->i[i];
378bfec09a0SHong Zhang     v      = a->a + a->i[i];
379bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
380bcc973b7SBarry Smith     alpha1 = x[3 * i];
381bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
382bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
383bcc973b7SBarry Smith     while (n-- > 0) {
384bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
385bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
386bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
3879371c9d4SSatish Balay       idx++;
3889371c9d4SSatish Balay       v++;
389bcc973b7SBarry Smith     }
390bcc973b7SBarry Smith   }
3919566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
3929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
394bcc973b7SBarry Smith   PetscFunctionReturn(0);
395bcc973b7SBarry Smith }
396bcc973b7SBarry Smith 
3979371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
3984cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
399bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
400d2840e09SBarry Smith   const PetscScalar *x, *v;
401d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3;
402b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
403d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
404bcc973b7SBarry Smith 
405bcc973b7SBarry Smith   PetscFunctionBegin;
4069566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4079566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4089566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
409bcc973b7SBarry Smith   idx = a->j;
410bcc973b7SBarry Smith   v   = a->a;
411bcc973b7SBarry Smith   ii  = a->i;
412bcc973b7SBarry Smith 
413bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
414bcc973b7SBarry Smith     jrow = ii[i];
415bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
416bcc973b7SBarry Smith     sum1 = 0.0;
417bcc973b7SBarry Smith     sum2 = 0.0;
418bcc973b7SBarry Smith     sum3 = 0.0;
419bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
420bcc973b7SBarry Smith       sum1 += v[jrow] * x[3 * idx[jrow]];
421bcc973b7SBarry Smith       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
422bcc973b7SBarry Smith       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
423bcc973b7SBarry Smith       jrow++;
424bcc973b7SBarry Smith     }
425bcc973b7SBarry Smith     y[3 * i] += sum1;
426bcc973b7SBarry Smith     y[3 * i + 1] += sum2;
427bcc973b7SBarry Smith     y[3 * i + 2] += sum3;
428bcc973b7SBarry Smith   }
429bcc973b7SBarry Smith 
4309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
433bcc973b7SBarry Smith   PetscFunctionReturn(0);
434bcc973b7SBarry Smith }
4359371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) {
4364cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
437bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
438d2840e09SBarry Smith   const PetscScalar *x, *v;
439d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3;
440d2840e09SBarry Smith   PetscInt           n, i;
441d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
442bcc973b7SBarry Smith 
443bcc973b7SBarry Smith   PetscFunctionBegin;
4449566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
4459566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
447bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
448bfec09a0SHong Zhang     idx    = a->j + a->i[i];
449bfec09a0SHong Zhang     v      = a->a + a->i[i];
450bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
451bcc973b7SBarry Smith     alpha1 = x[3 * i];
452bcc973b7SBarry Smith     alpha2 = x[3 * i + 1];
453bcc973b7SBarry Smith     alpha3 = x[3 * i + 2];
454bcc973b7SBarry Smith     while (n-- > 0) {
455bcc973b7SBarry Smith       y[3 * (*idx)] += alpha1 * (*v);
456bcc973b7SBarry Smith       y[3 * (*idx) + 1] += alpha2 * (*v);
457bcc973b7SBarry Smith       y[3 * (*idx) + 2] += alpha3 * (*v);
4589371c9d4SSatish Balay       idx++;
4599371c9d4SSatish Balay       v++;
460bcc973b7SBarry Smith     }
461bcc973b7SBarry Smith   }
4629566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(6.0 * a->nz));
4639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
465bcc973b7SBarry Smith   PetscFunctionReturn(0);
466bcc973b7SBarry Smith }
467bcc973b7SBarry Smith 
468bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4699371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
4704cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
471bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
472d2840e09SBarry Smith   const PetscScalar *x, *v;
473d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
474d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
475d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
476bcc973b7SBarry Smith 
477bcc973b7SBarry Smith   PetscFunctionBegin;
4789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
480bcc973b7SBarry Smith   idx = a->j;
481bcc973b7SBarry Smith   v   = a->a;
482bcc973b7SBarry Smith   ii  = a->i;
483bcc973b7SBarry Smith 
484bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
485bcc973b7SBarry Smith     jrow = ii[i];
486bcc973b7SBarry Smith     n    = ii[i + 1] - jrow;
487bcc973b7SBarry Smith     sum1 = 0.0;
488bcc973b7SBarry Smith     sum2 = 0.0;
489bcc973b7SBarry Smith     sum3 = 0.0;
490bcc973b7SBarry Smith     sum4 = 0.0;
491b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
492bcc973b7SBarry Smith     for (j = 0; j < n; j++) {
493bcc973b7SBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
494bcc973b7SBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
495bcc973b7SBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
496bcc973b7SBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
497bcc973b7SBarry Smith       jrow++;
498bcc973b7SBarry Smith     }
499bcc973b7SBarry Smith     y[4 * i]     = sum1;
500bcc973b7SBarry Smith     y[4 * i + 1] = sum2;
501bcc973b7SBarry Smith     y[4 * i + 2] = sum3;
502bcc973b7SBarry Smith     y[4 * i + 3] = sum4;
503bcc973b7SBarry Smith   }
504bcc973b7SBarry Smith 
5059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
5069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
508bcc973b7SBarry Smith   PetscFunctionReturn(0);
509bcc973b7SBarry Smith }
510bcc973b7SBarry Smith 
5119371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) {
5124cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
513bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
514d2840e09SBarry Smith   const PetscScalar *x, *v;
515d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
516d2840e09SBarry Smith   PetscInt           n, i;
517d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
518bcc973b7SBarry Smith 
519bcc973b7SBarry Smith   PetscFunctionBegin;
5209566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
5219566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5229566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
523bcc973b7SBarry Smith   for (i = 0; i < m; i++) {
524bfec09a0SHong Zhang     idx    = a->j + a->i[i];
525bfec09a0SHong Zhang     v      = a->a + a->i[i];
526bcc973b7SBarry Smith     n      = a->i[i + 1] - a->i[i];
527bcc973b7SBarry Smith     alpha1 = x[4 * i];
528bcc973b7SBarry Smith     alpha2 = x[4 * i + 1];
529bcc973b7SBarry Smith     alpha3 = x[4 * i + 2];
530bcc973b7SBarry Smith     alpha4 = x[4 * i + 3];
531bcc973b7SBarry Smith     while (n-- > 0) {
532bcc973b7SBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
533bcc973b7SBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
534bcc973b7SBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
535bcc973b7SBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
5369371c9d4SSatish Balay       idx++;
5379371c9d4SSatish Balay       v++;
538bcc973b7SBarry Smith     }
539bcc973b7SBarry Smith   }
5409566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
543bcc973b7SBarry Smith   PetscFunctionReturn(0);
544bcc973b7SBarry Smith }
545bcc973b7SBarry Smith 
5469371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
548f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
549d2840e09SBarry Smith   const PetscScalar *x, *v;
550d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4;
551b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
552d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
553f9fae5adSBarry Smith 
554f9fae5adSBarry Smith   PetscFunctionBegin;
5559566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
558f9fae5adSBarry Smith   idx = a->j;
559f9fae5adSBarry Smith   v   = a->a;
560f9fae5adSBarry Smith   ii  = a->i;
561f9fae5adSBarry Smith 
562f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
563f9fae5adSBarry Smith     jrow = ii[i];
564f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
565f9fae5adSBarry Smith     sum1 = 0.0;
566f9fae5adSBarry Smith     sum2 = 0.0;
567f9fae5adSBarry Smith     sum3 = 0.0;
568f9fae5adSBarry Smith     sum4 = 0.0;
569f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
570f9fae5adSBarry Smith       sum1 += v[jrow] * x[4 * idx[jrow]];
571f9fae5adSBarry Smith       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
572f9fae5adSBarry Smith       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
573f9fae5adSBarry Smith       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
574f9fae5adSBarry Smith       jrow++;
575f9fae5adSBarry Smith     }
576f9fae5adSBarry Smith     y[4 * i] += sum1;
577f9fae5adSBarry Smith     y[4 * i + 1] += sum2;
578f9fae5adSBarry Smith     y[4 * i + 2] += sum3;
579f9fae5adSBarry Smith     y[4 * i + 3] += sum4;
580f9fae5adSBarry Smith   }
581f9fae5adSBarry Smith 
5829566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
5839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
585f9fae5adSBarry Smith   PetscFunctionReturn(0);
586bcc973b7SBarry Smith }
5879371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) {
5884cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
589f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
590d2840e09SBarry Smith   const PetscScalar *x, *v;
591d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
592d2840e09SBarry Smith   PetscInt           n, i;
593d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
594f9fae5adSBarry Smith 
595f9fae5adSBarry Smith   PetscFunctionBegin;
5969566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
5979566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
599bfec09a0SHong Zhang 
600f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
601bfec09a0SHong Zhang     idx    = a->j + a->i[i];
602bfec09a0SHong Zhang     v      = a->a + a->i[i];
603f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
604f9fae5adSBarry Smith     alpha1 = x[4 * i];
605f9fae5adSBarry Smith     alpha2 = x[4 * i + 1];
606f9fae5adSBarry Smith     alpha3 = x[4 * i + 2];
607f9fae5adSBarry Smith     alpha4 = x[4 * i + 3];
608f9fae5adSBarry Smith     while (n-- > 0) {
609f9fae5adSBarry Smith       y[4 * (*idx)] += alpha1 * (*v);
610f9fae5adSBarry Smith       y[4 * (*idx) + 1] += alpha2 * (*v);
611f9fae5adSBarry Smith       y[4 * (*idx) + 2] += alpha3 * (*v);
612f9fae5adSBarry Smith       y[4 * (*idx) + 3] += alpha4 * (*v);
6139371c9d4SSatish Balay       idx++;
6149371c9d4SSatish Balay       v++;
615f9fae5adSBarry Smith     }
616f9fae5adSBarry Smith   }
6179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(8.0 * a->nz));
6189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
620f9fae5adSBarry Smith   PetscFunctionReturn(0);
621f9fae5adSBarry Smith }
622f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6236cd98798SBarry Smith 
6249371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6254cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
626f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
627d2840e09SBarry Smith   const PetscScalar *x, *v;
628d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
629d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
630d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
631f9fae5adSBarry Smith 
632f9fae5adSBarry Smith   PetscFunctionBegin;
6339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6349566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
635f9fae5adSBarry Smith   idx = a->j;
636f9fae5adSBarry Smith   v   = a->a;
637f9fae5adSBarry Smith   ii  = a->i;
638f9fae5adSBarry Smith 
639f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
640f9fae5adSBarry Smith     jrow = ii[i];
641f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
642f9fae5adSBarry Smith     sum1 = 0.0;
643f9fae5adSBarry Smith     sum2 = 0.0;
644f9fae5adSBarry Smith     sum3 = 0.0;
645f9fae5adSBarry Smith     sum4 = 0.0;
646f9fae5adSBarry Smith     sum5 = 0.0;
64726fbe8dcSKarl Rupp 
648b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
649f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
650f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
651f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
652f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
653f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
654f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
655f9fae5adSBarry Smith       jrow++;
656f9fae5adSBarry Smith     }
657f9fae5adSBarry Smith     y[5 * i]     = sum1;
658f9fae5adSBarry Smith     y[5 * i + 1] = sum2;
659f9fae5adSBarry Smith     y[5 * i + 2] = sum3;
660f9fae5adSBarry Smith     y[5 * i + 3] = sum4;
661f9fae5adSBarry Smith     y[5 * i + 4] = sum5;
662f9fae5adSBarry Smith   }
663f9fae5adSBarry Smith 
6649566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
6659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
667f9fae5adSBarry Smith   PetscFunctionReturn(0);
668f9fae5adSBarry Smith }
669f9fae5adSBarry Smith 
6709371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) {
6714cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
672f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
673d2840e09SBarry Smith   const PetscScalar *x, *v;
674d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
675d2840e09SBarry Smith   PetscInt           n, i;
676d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
677f9fae5adSBarry Smith 
678f9fae5adSBarry Smith   PetscFunctionBegin;
6799566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
6809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
682bfec09a0SHong Zhang 
683f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
684bfec09a0SHong Zhang     idx    = a->j + a->i[i];
685bfec09a0SHong Zhang     v      = a->a + a->i[i];
686f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
687f9fae5adSBarry Smith     alpha1 = x[5 * i];
688f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
689f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
690f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
691f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
692f9fae5adSBarry Smith     while (n-- > 0) {
693f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
694f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
695f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
696f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
697f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
6989371c9d4SSatish Balay       idx++;
6999371c9d4SSatish Balay       v++;
700f9fae5adSBarry Smith     }
701f9fae5adSBarry Smith   }
7029566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
705f9fae5adSBarry Smith   PetscFunctionReturn(0);
706f9fae5adSBarry Smith }
707f9fae5adSBarry Smith 
7089371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
7094cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
710f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
711d2840e09SBarry Smith   const PetscScalar *x, *v;
712d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
713b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
714d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
715f9fae5adSBarry Smith 
716f9fae5adSBarry Smith   PetscFunctionBegin;
7179566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
720f9fae5adSBarry Smith   idx = a->j;
721f9fae5adSBarry Smith   v   = a->a;
722f9fae5adSBarry Smith   ii  = a->i;
723f9fae5adSBarry Smith 
724f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
725f9fae5adSBarry Smith     jrow = ii[i];
726f9fae5adSBarry Smith     n    = ii[i + 1] - jrow;
727f9fae5adSBarry Smith     sum1 = 0.0;
728f9fae5adSBarry Smith     sum2 = 0.0;
729f9fae5adSBarry Smith     sum3 = 0.0;
730f9fae5adSBarry Smith     sum4 = 0.0;
731f9fae5adSBarry Smith     sum5 = 0.0;
732f9fae5adSBarry Smith     for (j = 0; j < n; j++) {
733f9fae5adSBarry Smith       sum1 += v[jrow] * x[5 * idx[jrow]];
734f9fae5adSBarry Smith       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
735f9fae5adSBarry Smith       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
736f9fae5adSBarry Smith       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
737f9fae5adSBarry Smith       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
738f9fae5adSBarry Smith       jrow++;
739f9fae5adSBarry Smith     }
740f9fae5adSBarry Smith     y[5 * i] += sum1;
741f9fae5adSBarry Smith     y[5 * i + 1] += sum2;
742f9fae5adSBarry Smith     y[5 * i + 2] += sum3;
743f9fae5adSBarry Smith     y[5 * i + 3] += sum4;
744f9fae5adSBarry Smith     y[5 * i + 4] += sum5;
745f9fae5adSBarry Smith   }
746f9fae5adSBarry Smith 
7479566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
750f9fae5adSBarry Smith   PetscFunctionReturn(0);
751f9fae5adSBarry Smith }
752f9fae5adSBarry Smith 
7539371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) {
7544cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
755f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
756d2840e09SBarry Smith   const PetscScalar *x, *v;
757d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
758d2840e09SBarry Smith   PetscInt           n, i;
759d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
760f9fae5adSBarry Smith 
761f9fae5adSBarry Smith   PetscFunctionBegin;
7629566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
7639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
7649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
765bfec09a0SHong Zhang 
766f9fae5adSBarry Smith   for (i = 0; i < m; i++) {
767bfec09a0SHong Zhang     idx    = a->j + a->i[i];
768bfec09a0SHong Zhang     v      = a->a + a->i[i];
769f9fae5adSBarry Smith     n      = a->i[i + 1] - a->i[i];
770f9fae5adSBarry Smith     alpha1 = x[5 * i];
771f9fae5adSBarry Smith     alpha2 = x[5 * i + 1];
772f9fae5adSBarry Smith     alpha3 = x[5 * i + 2];
773f9fae5adSBarry Smith     alpha4 = x[5 * i + 3];
774f9fae5adSBarry Smith     alpha5 = x[5 * i + 4];
775f9fae5adSBarry Smith     while (n-- > 0) {
776f9fae5adSBarry Smith       y[5 * (*idx)] += alpha1 * (*v);
777f9fae5adSBarry Smith       y[5 * (*idx) + 1] += alpha2 * (*v);
778f9fae5adSBarry Smith       y[5 * (*idx) + 2] += alpha3 * (*v);
779f9fae5adSBarry Smith       y[5 * (*idx) + 3] += alpha4 * (*v);
780f9fae5adSBarry Smith       y[5 * (*idx) + 4] += alpha5 * (*v);
7819371c9d4SSatish Balay       idx++;
7829371c9d4SSatish Balay       v++;
783f9fae5adSBarry Smith     }
784f9fae5adSBarry Smith   }
7859566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(10.0 * a->nz));
7869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
7879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
788f9fae5adSBarry Smith   PetscFunctionReturn(0);
789bcc973b7SBarry Smith }
790bcc973b7SBarry Smith 
7916cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7929371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
7936cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
7946cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
795d2840e09SBarry Smith   const PetscScalar *x, *v;
796d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
797d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
798d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
7996cd98798SBarry Smith 
8006cd98798SBarry Smith   PetscFunctionBegin;
8019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8029566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
8036cd98798SBarry Smith   idx = a->j;
8046cd98798SBarry Smith   v   = a->a;
8056cd98798SBarry Smith   ii  = a->i;
8066cd98798SBarry Smith 
8076cd98798SBarry Smith   for (i = 0; i < m; i++) {
8086cd98798SBarry Smith     jrow = ii[i];
8096cd98798SBarry Smith     n    = ii[i + 1] - jrow;
8106cd98798SBarry Smith     sum1 = 0.0;
8116cd98798SBarry Smith     sum2 = 0.0;
8126cd98798SBarry Smith     sum3 = 0.0;
8136cd98798SBarry Smith     sum4 = 0.0;
8146cd98798SBarry Smith     sum5 = 0.0;
8156cd98798SBarry Smith     sum6 = 0.0;
81626fbe8dcSKarl Rupp 
817b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
8186cd98798SBarry Smith     for (j = 0; j < n; j++) {
8196cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
8206cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
8216cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
8226cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
8236cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
8246cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
8256cd98798SBarry Smith       jrow++;
8266cd98798SBarry Smith     }
8276cd98798SBarry Smith     y[6 * i]     = sum1;
8286cd98798SBarry Smith     y[6 * i + 1] = sum2;
8296cd98798SBarry Smith     y[6 * i + 2] = sum3;
8306cd98798SBarry Smith     y[6 * i + 3] = sum4;
8316cd98798SBarry Smith     y[6 * i + 4] = sum5;
8326cd98798SBarry Smith     y[6 * i + 5] = sum6;
8336cd98798SBarry Smith   }
8346cd98798SBarry Smith 
8359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
8369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8386cd98798SBarry Smith   PetscFunctionReturn(0);
8396cd98798SBarry Smith }
8406cd98798SBarry Smith 
8419371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) {
8426cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8436cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
844d2840e09SBarry Smith   const PetscScalar *x, *v;
845d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
846d2840e09SBarry Smith   PetscInt           n, i;
847d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
8486cd98798SBarry Smith 
8496cd98798SBarry Smith   PetscFunctionBegin;
8509566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
8519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
853bfec09a0SHong Zhang 
8546cd98798SBarry Smith   for (i = 0; i < m; i++) {
855bfec09a0SHong Zhang     idx    = a->j + a->i[i];
856bfec09a0SHong Zhang     v      = a->a + a->i[i];
8576cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
8586cd98798SBarry Smith     alpha1 = x[6 * i];
8596cd98798SBarry Smith     alpha2 = x[6 * i + 1];
8606cd98798SBarry Smith     alpha3 = x[6 * i + 2];
8616cd98798SBarry Smith     alpha4 = x[6 * i + 3];
8626cd98798SBarry Smith     alpha5 = x[6 * i + 4];
8636cd98798SBarry Smith     alpha6 = x[6 * i + 5];
8646cd98798SBarry Smith     while (n-- > 0) {
8656cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
8666cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
8676cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
8686cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
8696cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
8706cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
8719371c9d4SSatish Balay       idx++;
8729371c9d4SSatish Balay       v++;
8736cd98798SBarry Smith     }
8746cd98798SBarry Smith   }
8759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
8769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
8779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
8786cd98798SBarry Smith   PetscFunctionReturn(0);
8796cd98798SBarry Smith }
8806cd98798SBarry Smith 
8819371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
8826cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
8836cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
884d2840e09SBarry Smith   const PetscScalar *x, *v;
885d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
886b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
887d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
8886cd98798SBarry Smith 
8896cd98798SBarry Smith   PetscFunctionBegin;
8909566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
8919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
8929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
8936cd98798SBarry Smith   idx = a->j;
8946cd98798SBarry Smith   v   = a->a;
8956cd98798SBarry Smith   ii  = a->i;
8966cd98798SBarry Smith 
8976cd98798SBarry Smith   for (i = 0; i < m; i++) {
8986cd98798SBarry Smith     jrow = ii[i];
8996cd98798SBarry Smith     n    = ii[i + 1] - jrow;
9006cd98798SBarry Smith     sum1 = 0.0;
9016cd98798SBarry Smith     sum2 = 0.0;
9026cd98798SBarry Smith     sum3 = 0.0;
9036cd98798SBarry Smith     sum4 = 0.0;
9046cd98798SBarry Smith     sum5 = 0.0;
9056cd98798SBarry Smith     sum6 = 0.0;
9066cd98798SBarry Smith     for (j = 0; j < n; j++) {
9076cd98798SBarry Smith       sum1 += v[jrow] * x[6 * idx[jrow]];
9086cd98798SBarry Smith       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
9096cd98798SBarry Smith       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
9106cd98798SBarry Smith       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
9116cd98798SBarry Smith       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
9126cd98798SBarry Smith       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
9136cd98798SBarry Smith       jrow++;
9146cd98798SBarry Smith     }
9156cd98798SBarry Smith     y[6 * i] += sum1;
9166cd98798SBarry Smith     y[6 * i + 1] += sum2;
9176cd98798SBarry Smith     y[6 * i + 2] += sum3;
9186cd98798SBarry Smith     y[6 * i + 3] += sum4;
9196cd98798SBarry Smith     y[6 * i + 4] += sum5;
9206cd98798SBarry Smith     y[6 * i + 5] += sum6;
9216cd98798SBarry Smith   }
9226cd98798SBarry Smith 
9239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9266cd98798SBarry Smith   PetscFunctionReturn(0);
9276cd98798SBarry Smith }
9286cd98798SBarry Smith 
9299371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) {
9306cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
9316cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
932d2840e09SBarry Smith   const PetscScalar *x, *v;
933d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
934d2840e09SBarry Smith   PetscInt           n, i;
935d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
9366cd98798SBarry Smith 
9376cd98798SBarry Smith   PetscFunctionBegin;
9389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
9399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
941bfec09a0SHong Zhang 
9426cd98798SBarry Smith   for (i = 0; i < m; i++) {
943bfec09a0SHong Zhang     idx    = a->j + a->i[i];
944bfec09a0SHong Zhang     v      = a->a + a->i[i];
9456cd98798SBarry Smith     n      = a->i[i + 1] - a->i[i];
9466cd98798SBarry Smith     alpha1 = x[6 * i];
9476cd98798SBarry Smith     alpha2 = x[6 * i + 1];
9486cd98798SBarry Smith     alpha3 = x[6 * i + 2];
9496cd98798SBarry Smith     alpha4 = x[6 * i + 3];
9506cd98798SBarry Smith     alpha5 = x[6 * i + 4];
9516cd98798SBarry Smith     alpha6 = x[6 * i + 5];
9526cd98798SBarry Smith     while (n-- > 0) {
9536cd98798SBarry Smith       y[6 * (*idx)] += alpha1 * (*v);
9546cd98798SBarry Smith       y[6 * (*idx) + 1] += alpha2 * (*v);
9556cd98798SBarry Smith       y[6 * (*idx) + 2] += alpha3 * (*v);
9566cd98798SBarry Smith       y[6 * (*idx) + 3] += alpha4 * (*v);
9576cd98798SBarry Smith       y[6 * (*idx) + 4] += alpha5 * (*v);
9586cd98798SBarry Smith       y[6 * (*idx) + 5] += alpha6 * (*v);
9599371c9d4SSatish Balay       idx++;
9609371c9d4SSatish Balay       v++;
9616cd98798SBarry Smith     }
9626cd98798SBarry Smith   }
9639566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(12.0 * a->nz));
9649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
9659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
9666cd98798SBarry Smith   PetscFunctionReturn(0);
9676cd98798SBarry Smith }
9686cd98798SBarry Smith 
96966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
9709371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
971ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
972ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
973d2840e09SBarry Smith   const PetscScalar *x, *v;
974d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
975d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
976d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
977ed8eea03SBarry Smith 
978ed8eea03SBarry Smith   PetscFunctionBegin;
9799566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
9809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
981ed8eea03SBarry Smith   idx = a->j;
982ed8eea03SBarry Smith   v   = a->a;
983ed8eea03SBarry Smith   ii  = a->i;
984ed8eea03SBarry Smith 
985ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
986ed8eea03SBarry Smith     jrow = ii[i];
987ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
988ed8eea03SBarry Smith     sum1 = 0.0;
989ed8eea03SBarry Smith     sum2 = 0.0;
990ed8eea03SBarry Smith     sum3 = 0.0;
991ed8eea03SBarry Smith     sum4 = 0.0;
992ed8eea03SBarry Smith     sum5 = 0.0;
993ed8eea03SBarry Smith     sum6 = 0.0;
994ed8eea03SBarry Smith     sum7 = 0.0;
99526fbe8dcSKarl Rupp 
996b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
997ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
998ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
999ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1000ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1001ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1002ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1003ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1004ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1005ed8eea03SBarry Smith       jrow++;
1006ed8eea03SBarry Smith     }
1007ed8eea03SBarry Smith     y[7 * i]     = sum1;
1008ed8eea03SBarry Smith     y[7 * i + 1] = sum2;
1009ed8eea03SBarry Smith     y[7 * i + 2] = sum3;
1010ed8eea03SBarry Smith     y[7 * i + 3] = sum4;
1011ed8eea03SBarry Smith     y[7 * i + 4] = sum5;
1012ed8eea03SBarry Smith     y[7 * i + 5] = sum6;
1013ed8eea03SBarry Smith     y[7 * i + 6] = sum7;
1014ed8eea03SBarry Smith   }
1015ed8eea03SBarry Smith 
10169566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
10179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1019ed8eea03SBarry Smith   PetscFunctionReturn(0);
1020ed8eea03SBarry Smith }
1021ed8eea03SBarry Smith 
10229371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) {
1023ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1024ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1025d2840e09SBarry Smith   const PetscScalar *x, *v;
1026d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1027d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1028d2840e09SBarry Smith   PetscInt           n, i;
1029ed8eea03SBarry Smith 
1030ed8eea03SBarry Smith   PetscFunctionBegin;
10319566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
10329566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10339566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1034ed8eea03SBarry Smith 
1035ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1036ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1037ed8eea03SBarry Smith     v      = a->a + a->i[i];
1038ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1039ed8eea03SBarry Smith     alpha1 = x[7 * i];
1040ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1041ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1042ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1043ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1044ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1045ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1046ed8eea03SBarry Smith     while (n-- > 0) {
1047ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1048ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1049ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1050ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1051ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1052ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1053ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
10549371c9d4SSatish Balay       idx++;
10559371c9d4SSatish Balay       v++;
1056ed8eea03SBarry Smith     }
1057ed8eea03SBarry Smith   }
10589566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
10599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
10609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1061ed8eea03SBarry Smith   PetscFunctionReturn(0);
1062ed8eea03SBarry Smith }
1063ed8eea03SBarry Smith 
10649371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1065ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1066ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1067d2840e09SBarry Smith   const PetscScalar *x, *v;
1068d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1069d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1070ed8eea03SBarry Smith   PetscInt           n, i, jrow, j;
1071ed8eea03SBarry Smith 
1072ed8eea03SBarry Smith   PetscFunctionBegin;
10739566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
10749566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
10759566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1076ed8eea03SBarry Smith   idx = a->j;
1077ed8eea03SBarry Smith   v   = a->a;
1078ed8eea03SBarry Smith   ii  = a->i;
1079ed8eea03SBarry Smith 
1080ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1081ed8eea03SBarry Smith     jrow = ii[i];
1082ed8eea03SBarry Smith     n    = ii[i + 1] - jrow;
1083ed8eea03SBarry Smith     sum1 = 0.0;
1084ed8eea03SBarry Smith     sum2 = 0.0;
1085ed8eea03SBarry Smith     sum3 = 0.0;
1086ed8eea03SBarry Smith     sum4 = 0.0;
1087ed8eea03SBarry Smith     sum5 = 0.0;
1088ed8eea03SBarry Smith     sum6 = 0.0;
1089ed8eea03SBarry Smith     sum7 = 0.0;
1090ed8eea03SBarry Smith     for (j = 0; j < n; j++) {
1091ed8eea03SBarry Smith       sum1 += v[jrow] * x[7 * idx[jrow]];
1092ed8eea03SBarry Smith       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1093ed8eea03SBarry Smith       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1094ed8eea03SBarry Smith       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1095ed8eea03SBarry Smith       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1096ed8eea03SBarry Smith       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1097ed8eea03SBarry Smith       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1098ed8eea03SBarry Smith       jrow++;
1099ed8eea03SBarry Smith     }
1100ed8eea03SBarry Smith     y[7 * i] += sum1;
1101ed8eea03SBarry Smith     y[7 * i + 1] += sum2;
1102ed8eea03SBarry Smith     y[7 * i + 2] += sum3;
1103ed8eea03SBarry Smith     y[7 * i + 3] += sum4;
1104ed8eea03SBarry Smith     y[7 * i + 4] += sum5;
1105ed8eea03SBarry Smith     y[7 * i + 5] += sum6;
1106ed8eea03SBarry Smith     y[7 * i + 6] += sum7;
1107ed8eea03SBarry Smith   }
1108ed8eea03SBarry Smith 
11099566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1112ed8eea03SBarry Smith   PetscFunctionReturn(0);
1113ed8eea03SBarry Smith }
1114ed8eea03SBarry Smith 
11159371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) {
1116ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1117ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1118d2840e09SBarry Smith   const PetscScalar *x, *v;
1119d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1120d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1121d2840e09SBarry Smith   PetscInt           n, i;
1122ed8eea03SBarry Smith 
1123ed8eea03SBarry Smith   PetscFunctionBegin;
11249566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
11259566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11269566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1127ed8eea03SBarry Smith   for (i = 0; i < m; i++) {
1128ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1129ed8eea03SBarry Smith     v      = a->a + a->i[i];
1130ed8eea03SBarry Smith     n      = a->i[i + 1] - a->i[i];
1131ed8eea03SBarry Smith     alpha1 = x[7 * i];
1132ed8eea03SBarry Smith     alpha2 = x[7 * i + 1];
1133ed8eea03SBarry Smith     alpha3 = x[7 * i + 2];
1134ed8eea03SBarry Smith     alpha4 = x[7 * i + 3];
1135ed8eea03SBarry Smith     alpha5 = x[7 * i + 4];
1136ed8eea03SBarry Smith     alpha6 = x[7 * i + 5];
1137ed8eea03SBarry Smith     alpha7 = x[7 * i + 6];
1138ed8eea03SBarry Smith     while (n-- > 0) {
1139ed8eea03SBarry Smith       y[7 * (*idx)] += alpha1 * (*v);
1140ed8eea03SBarry Smith       y[7 * (*idx) + 1] += alpha2 * (*v);
1141ed8eea03SBarry Smith       y[7 * (*idx) + 2] += alpha3 * (*v);
1142ed8eea03SBarry Smith       y[7 * (*idx) + 3] += alpha4 * (*v);
1143ed8eea03SBarry Smith       y[7 * (*idx) + 4] += alpha5 * (*v);
1144ed8eea03SBarry Smith       y[7 * (*idx) + 5] += alpha6 * (*v);
1145ed8eea03SBarry Smith       y[7 * (*idx) + 6] += alpha7 * (*v);
11469371c9d4SSatish Balay       idx++;
11479371c9d4SSatish Balay       v++;
1148ed8eea03SBarry Smith     }
1149ed8eea03SBarry Smith   }
11509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(14.0 * a->nz));
11519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1153ed8eea03SBarry Smith   PetscFunctionReturn(0);
1154ed8eea03SBarry Smith }
1155ed8eea03SBarry Smith 
11569371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
115766ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
115866ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1159d2840e09SBarry Smith   const PetscScalar *x, *v;
1160d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1161d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1162d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
116366ed3db0SBarry Smith 
116466ed3db0SBarry Smith   PetscFunctionBegin;
11659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
116766ed3db0SBarry Smith   idx = a->j;
116866ed3db0SBarry Smith   v   = a->a;
116966ed3db0SBarry Smith   ii  = a->i;
117066ed3db0SBarry Smith 
117166ed3db0SBarry Smith   for (i = 0; i < m; i++) {
117266ed3db0SBarry Smith     jrow = ii[i];
117366ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
117466ed3db0SBarry Smith     sum1 = 0.0;
117566ed3db0SBarry Smith     sum2 = 0.0;
117666ed3db0SBarry Smith     sum3 = 0.0;
117766ed3db0SBarry Smith     sum4 = 0.0;
117866ed3db0SBarry Smith     sum5 = 0.0;
117966ed3db0SBarry Smith     sum6 = 0.0;
118066ed3db0SBarry Smith     sum7 = 0.0;
118166ed3db0SBarry Smith     sum8 = 0.0;
118226fbe8dcSKarl Rupp 
1183b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
118466ed3db0SBarry Smith     for (j = 0; j < n; j++) {
118566ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
118666ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
118766ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
118866ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
118966ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
119066ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
119166ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
119266ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
119366ed3db0SBarry Smith       jrow++;
119466ed3db0SBarry Smith     }
119566ed3db0SBarry Smith     y[8 * i]     = sum1;
119666ed3db0SBarry Smith     y[8 * i + 1] = sum2;
119766ed3db0SBarry Smith     y[8 * i + 2] = sum3;
119866ed3db0SBarry Smith     y[8 * i + 3] = sum4;
119966ed3db0SBarry Smith     y[8 * i + 4] = sum5;
120066ed3db0SBarry Smith     y[8 * i + 5] = sum6;
120166ed3db0SBarry Smith     y[8 * i + 6] = sum7;
120266ed3db0SBarry Smith     y[8 * i + 7] = sum8;
120366ed3db0SBarry Smith   }
120466ed3db0SBarry Smith 
12059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
12069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
120866ed3db0SBarry Smith   PetscFunctionReturn(0);
120966ed3db0SBarry Smith }
121066ed3db0SBarry Smith 
12119371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) {
121266ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
121366ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1214d2840e09SBarry Smith   const PetscScalar *x, *v;
1215d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1216d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1217d2840e09SBarry Smith   PetscInt           n, i;
121866ed3db0SBarry Smith 
121966ed3db0SBarry Smith   PetscFunctionBegin;
12209566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
12219566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12229566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1223bfec09a0SHong Zhang 
122466ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1225bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1226bfec09a0SHong Zhang     v      = a->a + a->i[i];
122766ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
122866ed3db0SBarry Smith     alpha1 = x[8 * i];
122966ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
123066ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
123166ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
123266ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
123366ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
123466ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
123566ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
123666ed3db0SBarry Smith     while (n-- > 0) {
123766ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
123866ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
123966ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
124066ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
124166ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
124266ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
124366ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
124466ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
12459371c9d4SSatish Balay       idx++;
12469371c9d4SSatish Balay       v++;
124766ed3db0SBarry Smith     }
124866ed3db0SBarry Smith   }
12499566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
12509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
12519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
125266ed3db0SBarry Smith   PetscFunctionReturn(0);
125366ed3db0SBarry Smith }
125466ed3db0SBarry Smith 
12559371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
125666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
125766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1258d2840e09SBarry Smith   const PetscScalar *x, *v;
1259d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1261b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
126266ed3db0SBarry Smith 
126366ed3db0SBarry Smith   PetscFunctionBegin;
12649566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
12659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
12669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
126766ed3db0SBarry Smith   idx = a->j;
126866ed3db0SBarry Smith   v   = a->a;
126966ed3db0SBarry Smith   ii  = a->i;
127066ed3db0SBarry Smith 
127166ed3db0SBarry Smith   for (i = 0; i < m; i++) {
127266ed3db0SBarry Smith     jrow = ii[i];
127366ed3db0SBarry Smith     n    = ii[i + 1] - jrow;
127466ed3db0SBarry Smith     sum1 = 0.0;
127566ed3db0SBarry Smith     sum2 = 0.0;
127666ed3db0SBarry Smith     sum3 = 0.0;
127766ed3db0SBarry Smith     sum4 = 0.0;
127866ed3db0SBarry Smith     sum5 = 0.0;
127966ed3db0SBarry Smith     sum6 = 0.0;
128066ed3db0SBarry Smith     sum7 = 0.0;
128166ed3db0SBarry Smith     sum8 = 0.0;
128266ed3db0SBarry Smith     for (j = 0; j < n; j++) {
128366ed3db0SBarry Smith       sum1 += v[jrow] * x[8 * idx[jrow]];
128466ed3db0SBarry Smith       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
128566ed3db0SBarry Smith       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
128666ed3db0SBarry Smith       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
128766ed3db0SBarry Smith       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
128866ed3db0SBarry Smith       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
128966ed3db0SBarry Smith       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
129066ed3db0SBarry Smith       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
129166ed3db0SBarry Smith       jrow++;
129266ed3db0SBarry Smith     }
129366ed3db0SBarry Smith     y[8 * i] += sum1;
129466ed3db0SBarry Smith     y[8 * i + 1] += sum2;
129566ed3db0SBarry Smith     y[8 * i + 2] += sum3;
129666ed3db0SBarry Smith     y[8 * i + 3] += sum4;
129766ed3db0SBarry Smith     y[8 * i + 4] += sum5;
129866ed3db0SBarry Smith     y[8 * i + 5] += sum6;
129966ed3db0SBarry Smith     y[8 * i + 6] += sum7;
130066ed3db0SBarry Smith     y[8 * i + 7] += sum8;
130166ed3db0SBarry Smith   }
130266ed3db0SBarry Smith 
13039566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
130666ed3db0SBarry Smith   PetscFunctionReturn(0);
130766ed3db0SBarry Smith }
130866ed3db0SBarry Smith 
13099371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) {
131066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
131166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1312d2840e09SBarry Smith   const PetscScalar *x, *v;
1313d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1314d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1315d2840e09SBarry Smith   PetscInt           n, i;
131666ed3db0SBarry Smith 
131766ed3db0SBarry Smith   PetscFunctionBegin;
13189566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
13199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13209566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
132166ed3db0SBarry Smith   for (i = 0; i < m; i++) {
1322bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1323bfec09a0SHong Zhang     v      = a->a + a->i[i];
132466ed3db0SBarry Smith     n      = a->i[i + 1] - a->i[i];
132566ed3db0SBarry Smith     alpha1 = x[8 * i];
132666ed3db0SBarry Smith     alpha2 = x[8 * i + 1];
132766ed3db0SBarry Smith     alpha3 = x[8 * i + 2];
132866ed3db0SBarry Smith     alpha4 = x[8 * i + 3];
132966ed3db0SBarry Smith     alpha5 = x[8 * i + 4];
133066ed3db0SBarry Smith     alpha6 = x[8 * i + 5];
133166ed3db0SBarry Smith     alpha7 = x[8 * i + 6];
133266ed3db0SBarry Smith     alpha8 = x[8 * i + 7];
133366ed3db0SBarry Smith     while (n-- > 0) {
133466ed3db0SBarry Smith       y[8 * (*idx)] += alpha1 * (*v);
133566ed3db0SBarry Smith       y[8 * (*idx) + 1] += alpha2 * (*v);
133666ed3db0SBarry Smith       y[8 * (*idx) + 2] += alpha3 * (*v);
133766ed3db0SBarry Smith       y[8 * (*idx) + 3] += alpha4 * (*v);
133866ed3db0SBarry Smith       y[8 * (*idx) + 4] += alpha5 * (*v);
133966ed3db0SBarry Smith       y[8 * (*idx) + 5] += alpha6 * (*v);
134066ed3db0SBarry Smith       y[8 * (*idx) + 6] += alpha7 * (*v);
134166ed3db0SBarry Smith       y[8 * (*idx) + 7] += alpha8 * (*v);
13429371c9d4SSatish Balay       idx++;
13439371c9d4SSatish Balay       v++;
134466ed3db0SBarry Smith     }
134566ed3db0SBarry Smith   }
13469566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(16.0 * a->nz));
13479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
13489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
13492f7816d4SBarry Smith   PetscFunctionReturn(0);
13502f7816d4SBarry Smith }
13512f7816d4SBarry Smith 
13522b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13539371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
13542b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
13552b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1356d2840e09SBarry Smith   const PetscScalar *x, *v;
1357d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1358d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1359d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
13602b692628SMatthew Knepley 
13612b692628SMatthew Knepley   PetscFunctionBegin;
13629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
13639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
13642b692628SMatthew Knepley   idx = a->j;
13652b692628SMatthew Knepley   v   = a->a;
13662b692628SMatthew Knepley   ii  = a->i;
13672b692628SMatthew Knepley 
13682b692628SMatthew Knepley   for (i = 0; i < m; i++) {
13692b692628SMatthew Knepley     jrow = ii[i];
13702b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
13712b692628SMatthew Knepley     sum1 = 0.0;
13722b692628SMatthew Knepley     sum2 = 0.0;
13732b692628SMatthew Knepley     sum3 = 0.0;
13742b692628SMatthew Knepley     sum4 = 0.0;
13752b692628SMatthew Knepley     sum5 = 0.0;
13762b692628SMatthew Knepley     sum6 = 0.0;
13772b692628SMatthew Knepley     sum7 = 0.0;
13782b692628SMatthew Knepley     sum8 = 0.0;
13792b692628SMatthew Knepley     sum9 = 0.0;
138026fbe8dcSKarl Rupp 
1381b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
13822b692628SMatthew Knepley     for (j = 0; j < n; j++) {
13832b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
13842b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
13852b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
13862b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
13872b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
13882b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
13892b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
13902b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
13912b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
13922b692628SMatthew Knepley       jrow++;
13932b692628SMatthew Knepley     }
13942b692628SMatthew Knepley     y[9 * i]     = sum1;
13952b692628SMatthew Knepley     y[9 * i + 1] = sum2;
13962b692628SMatthew Knepley     y[9 * i + 2] = sum3;
13972b692628SMatthew Knepley     y[9 * i + 3] = sum4;
13982b692628SMatthew Knepley     y[9 * i + 4] = sum5;
13992b692628SMatthew Knepley     y[9 * i + 5] = sum6;
14002b692628SMatthew Knepley     y[9 * i + 6] = sum7;
14012b692628SMatthew Knepley     y[9 * i + 7] = sum8;
14022b692628SMatthew Knepley     y[9 * i + 8] = sum9;
14032b692628SMatthew Knepley   }
14042b692628SMatthew Knepley 
14059566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
14069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14082b692628SMatthew Knepley   PetscFunctionReturn(0);
14092b692628SMatthew Knepley }
14102b692628SMatthew Knepley 
1411b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1412b9cda22cSBarry Smith 
14139371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) {
14142b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14152b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1416d2840e09SBarry Smith   const PetscScalar *x, *v;
1417d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1418d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1419d2840e09SBarry Smith   PetscInt           n, i;
14202b692628SMatthew Knepley 
14212b692628SMatthew Knepley   PetscFunctionBegin;
14229566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
14239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14249566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
14252b692628SMatthew Knepley 
14262b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14272b692628SMatthew Knepley     idx    = a->j + a->i[i];
14282b692628SMatthew Knepley     v      = a->a + a->i[i];
14292b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
14302b692628SMatthew Knepley     alpha1 = x[9 * i];
14312b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
14322b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
14332b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
14342b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
14352b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
14362b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
14372b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
14382f6bd0c6SMatthew Knepley     alpha9 = x[9 * i + 8];
14392b692628SMatthew Knepley     while (n-- > 0) {
14402b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
14412b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
14422b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
14432b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
14442b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
14452b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
14462b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
14472b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
14482b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
14499371c9d4SSatish Balay       idx++;
14509371c9d4SSatish Balay       v++;
14512b692628SMatthew Knepley     }
14522b692628SMatthew Knepley   }
14539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
14549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
14559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14562b692628SMatthew Knepley   PetscFunctionReturn(0);
14572b692628SMatthew Knepley }
14582b692628SMatthew Knepley 
14599371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
14602b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
14612b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1462d2840e09SBarry Smith   const PetscScalar *x, *v;
1463d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1464d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1465b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
14662b692628SMatthew Knepley 
14672b692628SMatthew Knepley   PetscFunctionBegin;
14689566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
14699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
14709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
14712b692628SMatthew Knepley   idx = a->j;
14722b692628SMatthew Knepley   v   = a->a;
14732b692628SMatthew Knepley   ii  = a->i;
14742b692628SMatthew Knepley 
14752b692628SMatthew Knepley   for (i = 0; i < m; i++) {
14762b692628SMatthew Knepley     jrow = ii[i];
14772b692628SMatthew Knepley     n    = ii[i + 1] - jrow;
14782b692628SMatthew Knepley     sum1 = 0.0;
14792b692628SMatthew Knepley     sum2 = 0.0;
14802b692628SMatthew Knepley     sum3 = 0.0;
14812b692628SMatthew Knepley     sum4 = 0.0;
14822b692628SMatthew Knepley     sum5 = 0.0;
14832b692628SMatthew Knepley     sum6 = 0.0;
14842b692628SMatthew Knepley     sum7 = 0.0;
14852b692628SMatthew Knepley     sum8 = 0.0;
14862b692628SMatthew Knepley     sum9 = 0.0;
14872b692628SMatthew Knepley     for (j = 0; j < n; j++) {
14882b692628SMatthew Knepley       sum1 += v[jrow] * x[9 * idx[jrow]];
14892b692628SMatthew Knepley       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
14902b692628SMatthew Knepley       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
14912b692628SMatthew Knepley       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
14922b692628SMatthew Knepley       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
14932b692628SMatthew Knepley       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
14942b692628SMatthew Knepley       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
14952b692628SMatthew Knepley       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
14962b692628SMatthew Knepley       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
14972b692628SMatthew Knepley       jrow++;
14982b692628SMatthew Knepley     }
14992b692628SMatthew Knepley     y[9 * i] += sum1;
15002b692628SMatthew Knepley     y[9 * i + 1] += sum2;
15012b692628SMatthew Knepley     y[9 * i + 2] += sum3;
15022b692628SMatthew Knepley     y[9 * i + 3] += sum4;
15032b692628SMatthew Knepley     y[9 * i + 4] += sum5;
15042b692628SMatthew Knepley     y[9 * i + 5] += sum6;
15052b692628SMatthew Knepley     y[9 * i + 6] += sum7;
15062b692628SMatthew Knepley     y[9 * i + 7] += sum8;
15072b692628SMatthew Knepley     y[9 * i + 8] += sum9;
15082b692628SMatthew Knepley   }
15092b692628SMatthew Knepley 
15109566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15119566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15132b692628SMatthew Knepley   PetscFunctionReturn(0);
15142b692628SMatthew Knepley }
15152b692628SMatthew Knepley 
15169371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) {
15172b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
15182b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1519d2840e09SBarry Smith   const PetscScalar *x, *v;
1520d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1521d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1522d2840e09SBarry Smith   PetscInt           n, i;
15232b692628SMatthew Knepley 
15242b692628SMatthew Knepley   PetscFunctionBegin;
15259566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
15269566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15279566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
15282b692628SMatthew Knepley   for (i = 0; i < m; i++) {
15292b692628SMatthew Knepley     idx    = a->j + a->i[i];
15302b692628SMatthew Knepley     v      = a->a + a->i[i];
15312b692628SMatthew Knepley     n      = a->i[i + 1] - a->i[i];
15322b692628SMatthew Knepley     alpha1 = x[9 * i];
15332b692628SMatthew Knepley     alpha2 = x[9 * i + 1];
15342b692628SMatthew Knepley     alpha3 = x[9 * i + 2];
15352b692628SMatthew Knepley     alpha4 = x[9 * i + 3];
15362b692628SMatthew Knepley     alpha5 = x[9 * i + 4];
15372b692628SMatthew Knepley     alpha6 = x[9 * i + 5];
15382b692628SMatthew Knepley     alpha7 = x[9 * i + 6];
15392b692628SMatthew Knepley     alpha8 = x[9 * i + 7];
15402b692628SMatthew Knepley     alpha9 = x[9 * i + 8];
15412b692628SMatthew Knepley     while (n-- > 0) {
15422b692628SMatthew Knepley       y[9 * (*idx)] += alpha1 * (*v);
15432b692628SMatthew Knepley       y[9 * (*idx) + 1] += alpha2 * (*v);
15442b692628SMatthew Knepley       y[9 * (*idx) + 2] += alpha3 * (*v);
15452b692628SMatthew Knepley       y[9 * (*idx) + 3] += alpha4 * (*v);
15462b692628SMatthew Knepley       y[9 * (*idx) + 4] += alpha5 * (*v);
15472b692628SMatthew Knepley       y[9 * (*idx) + 5] += alpha6 * (*v);
15482b692628SMatthew Knepley       y[9 * (*idx) + 6] += alpha7 * (*v);
15492b692628SMatthew Knepley       y[9 * (*idx) + 7] += alpha8 * (*v);
15502b692628SMatthew Knepley       y[9 * (*idx) + 8] += alpha9 * (*v);
15519371c9d4SSatish Balay       idx++;
15529371c9d4SSatish Balay       v++;
15532b692628SMatthew Knepley     }
15542b692628SMatthew Knepley   }
15559566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(18.0 * a->nz));
15569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
15579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
15582b692628SMatthew Knepley   PetscFunctionReturn(0);
15592b692628SMatthew Knepley }
15609371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1561b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1562b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1563d2840e09SBarry Smith   const PetscScalar *x, *v;
1564d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1565d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1566d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1567b9cda22cSBarry Smith 
1568b9cda22cSBarry Smith   PetscFunctionBegin;
15699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
15709566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1571b9cda22cSBarry Smith   idx = a->j;
1572b9cda22cSBarry Smith   v   = a->a;
1573b9cda22cSBarry Smith   ii  = a->i;
1574b9cda22cSBarry Smith 
1575b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1576b9cda22cSBarry Smith     jrow  = ii[i];
1577b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1578b9cda22cSBarry Smith     sum1  = 0.0;
1579b9cda22cSBarry Smith     sum2  = 0.0;
1580b9cda22cSBarry Smith     sum3  = 0.0;
1581b9cda22cSBarry Smith     sum4  = 0.0;
1582b9cda22cSBarry Smith     sum5  = 0.0;
1583b9cda22cSBarry Smith     sum6  = 0.0;
1584b9cda22cSBarry Smith     sum7  = 0.0;
1585b9cda22cSBarry Smith     sum8  = 0.0;
1586b9cda22cSBarry Smith     sum9  = 0.0;
1587b9cda22cSBarry Smith     sum10 = 0.0;
158826fbe8dcSKarl Rupp 
1589b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
1590b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1591b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1592b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1593b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1594b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1595b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1596b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1597b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1598b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1599b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1600b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1601b9cda22cSBarry Smith       jrow++;
1602b9cda22cSBarry Smith     }
1603b9cda22cSBarry Smith     y[10 * i]     = sum1;
1604b9cda22cSBarry Smith     y[10 * i + 1] = sum2;
1605b9cda22cSBarry Smith     y[10 * i + 2] = sum3;
1606b9cda22cSBarry Smith     y[10 * i + 3] = sum4;
1607b9cda22cSBarry Smith     y[10 * i + 4] = sum5;
1608b9cda22cSBarry Smith     y[10 * i + 5] = sum6;
1609b9cda22cSBarry Smith     y[10 * i + 6] = sum7;
1610b9cda22cSBarry Smith     y[10 * i + 7] = sum8;
1611b9cda22cSBarry Smith     y[10 * i + 8] = sum9;
1612b9cda22cSBarry Smith     y[10 * i + 9] = sum10;
1613b9cda22cSBarry Smith   }
1614b9cda22cSBarry Smith 
16159566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
16169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1618b9cda22cSBarry Smith   PetscFunctionReturn(0);
1619b9cda22cSBarry Smith }
1620b9cda22cSBarry Smith 
16219371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1622b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1623b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1624d2840e09SBarry Smith   const PetscScalar *x, *v;
1625d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1627b9cda22cSBarry Smith   PetscInt           n, i, jrow, j;
1628b9cda22cSBarry Smith 
1629b9cda22cSBarry Smith   PetscFunctionBegin;
16309566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
16319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16329566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1633b9cda22cSBarry Smith   idx = a->j;
1634b9cda22cSBarry Smith   v   = a->a;
1635b9cda22cSBarry Smith   ii  = a->i;
1636b9cda22cSBarry Smith 
1637b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1638b9cda22cSBarry Smith     jrow  = ii[i];
1639b9cda22cSBarry Smith     n     = ii[i + 1] - jrow;
1640b9cda22cSBarry Smith     sum1  = 0.0;
1641b9cda22cSBarry Smith     sum2  = 0.0;
1642b9cda22cSBarry Smith     sum3  = 0.0;
1643b9cda22cSBarry Smith     sum4  = 0.0;
1644b9cda22cSBarry Smith     sum5  = 0.0;
1645b9cda22cSBarry Smith     sum6  = 0.0;
1646b9cda22cSBarry Smith     sum7  = 0.0;
1647b9cda22cSBarry Smith     sum8  = 0.0;
1648b9cda22cSBarry Smith     sum9  = 0.0;
1649b9cda22cSBarry Smith     sum10 = 0.0;
1650b9cda22cSBarry Smith     for (j = 0; j < n; j++) {
1651b9cda22cSBarry Smith       sum1 += v[jrow] * x[10 * idx[jrow]];
1652b9cda22cSBarry Smith       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1653b9cda22cSBarry Smith       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1654b9cda22cSBarry Smith       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1655b9cda22cSBarry Smith       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1656b9cda22cSBarry Smith       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1657b9cda22cSBarry Smith       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1658b9cda22cSBarry Smith       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1659b9cda22cSBarry Smith       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1660b9cda22cSBarry Smith       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1661b9cda22cSBarry Smith       jrow++;
1662b9cda22cSBarry Smith     }
1663b9cda22cSBarry Smith     y[10 * i] += sum1;
1664b9cda22cSBarry Smith     y[10 * i + 1] += sum2;
1665b9cda22cSBarry Smith     y[10 * i + 2] += sum3;
1666b9cda22cSBarry Smith     y[10 * i + 3] += sum4;
1667b9cda22cSBarry Smith     y[10 * i + 4] += sum5;
1668b9cda22cSBarry Smith     y[10 * i + 5] += sum6;
1669b9cda22cSBarry Smith     y[10 * i + 6] += sum7;
1670b9cda22cSBarry Smith     y[10 * i + 7] += sum8;
1671b9cda22cSBarry Smith     y[10 * i + 8] += sum9;
1672b9cda22cSBarry Smith     y[10 * i + 9] += sum10;
1673b9cda22cSBarry Smith   }
1674b9cda22cSBarry Smith 
16759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
16769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
16779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1678b9cda22cSBarry Smith   PetscFunctionReturn(0);
1679b9cda22cSBarry Smith }
1680b9cda22cSBarry Smith 
16819371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) {
1682b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1683b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1684d2840e09SBarry Smith   const PetscScalar *x, *v;
1685d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1686d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1687d2840e09SBarry Smith   PetscInt           n, i;
1688b9cda22cSBarry Smith 
1689b9cda22cSBarry Smith   PetscFunctionBegin;
16909566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
16919566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
16929566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1693b9cda22cSBarry Smith 
1694b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1695b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1696b9cda22cSBarry Smith     v       = a->a + a->i[i];
1697b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1698e29fdc22SBarry Smith     alpha1  = x[10 * i];
1699e29fdc22SBarry Smith     alpha2  = x[10 * i + 1];
1700e29fdc22SBarry Smith     alpha3  = x[10 * i + 2];
1701e29fdc22SBarry Smith     alpha4  = x[10 * i + 3];
1702e29fdc22SBarry Smith     alpha5  = x[10 * i + 4];
1703e29fdc22SBarry Smith     alpha6  = x[10 * i + 5];
1704e29fdc22SBarry Smith     alpha7  = x[10 * i + 6];
1705e29fdc22SBarry Smith     alpha8  = x[10 * i + 7];
1706e29fdc22SBarry Smith     alpha9  = x[10 * i + 8];
1707b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1708b9cda22cSBarry Smith     while (n-- > 0) {
1709e29fdc22SBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1710e29fdc22SBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1711e29fdc22SBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1712e29fdc22SBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1713e29fdc22SBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1714e29fdc22SBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1715e29fdc22SBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1716e29fdc22SBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1717e29fdc22SBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1718b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17199371c9d4SSatish Balay       idx++;
17209371c9d4SSatish Balay       v++;
1721b9cda22cSBarry Smith     }
1722b9cda22cSBarry Smith   }
17239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1726b9cda22cSBarry Smith   PetscFunctionReturn(0);
1727b9cda22cSBarry Smith }
1728b9cda22cSBarry Smith 
17299371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) {
1730b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1731b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1732d2840e09SBarry Smith   const PetscScalar *x, *v;
1733d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1734d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1735d2840e09SBarry Smith   PetscInt           n, i;
1736b9cda22cSBarry Smith 
1737b9cda22cSBarry Smith   PetscFunctionBegin;
17389566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
17399566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17409566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1741b9cda22cSBarry Smith   for (i = 0; i < m; i++) {
1742b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1743b9cda22cSBarry Smith     v       = a->a + a->i[i];
1744b9cda22cSBarry Smith     n       = a->i[i + 1] - a->i[i];
1745b9cda22cSBarry Smith     alpha1  = x[10 * i];
1746b9cda22cSBarry Smith     alpha2  = x[10 * i + 1];
1747b9cda22cSBarry Smith     alpha3  = x[10 * i + 2];
1748b9cda22cSBarry Smith     alpha4  = x[10 * i + 3];
1749b9cda22cSBarry Smith     alpha5  = x[10 * i + 4];
1750b9cda22cSBarry Smith     alpha6  = x[10 * i + 5];
1751b9cda22cSBarry Smith     alpha7  = x[10 * i + 6];
1752b9cda22cSBarry Smith     alpha8  = x[10 * i + 7];
1753b9cda22cSBarry Smith     alpha9  = x[10 * i + 8];
1754b9cda22cSBarry Smith     alpha10 = x[10 * i + 9];
1755b9cda22cSBarry Smith     while (n-- > 0) {
1756b9cda22cSBarry Smith       y[10 * (*idx)] += alpha1 * (*v);
1757b9cda22cSBarry Smith       y[10 * (*idx) + 1] += alpha2 * (*v);
1758b9cda22cSBarry Smith       y[10 * (*idx) + 2] += alpha3 * (*v);
1759b9cda22cSBarry Smith       y[10 * (*idx) + 3] += alpha4 * (*v);
1760b9cda22cSBarry Smith       y[10 * (*idx) + 4] += alpha5 * (*v);
1761b9cda22cSBarry Smith       y[10 * (*idx) + 5] += alpha6 * (*v);
1762b9cda22cSBarry Smith       y[10 * (*idx) + 6] += alpha7 * (*v);
1763b9cda22cSBarry Smith       y[10 * (*idx) + 7] += alpha8 * (*v);
1764b9cda22cSBarry Smith       y[10 * (*idx) + 8] += alpha9 * (*v);
1765b9cda22cSBarry Smith       y[10 * (*idx) + 9] += alpha10 * (*v);
17669371c9d4SSatish Balay       idx++;
17679371c9d4SSatish Balay       v++;
1768b9cda22cSBarry Smith     }
1769b9cda22cSBarry Smith   }
17709566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(20.0 * a->nz));
17719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
17729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
1773b9cda22cSBarry Smith   PetscFunctionReturn(0);
1774b9cda22cSBarry Smith }
1775b9cda22cSBarry Smith 
17762f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
17779371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1778dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1779dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1780d2840e09SBarry Smith   const PetscScalar *x, *v;
1781d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1782d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1783d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
1784dbdd7285SBarry Smith 
1785dbdd7285SBarry Smith   PetscFunctionBegin;
17869566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
17879566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1788dbdd7285SBarry Smith   idx = a->j;
1789dbdd7285SBarry Smith   v   = a->a;
1790dbdd7285SBarry Smith   ii  = a->i;
1791dbdd7285SBarry Smith 
1792dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1793dbdd7285SBarry Smith     jrow  = ii[i];
1794dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1795dbdd7285SBarry Smith     sum1  = 0.0;
1796dbdd7285SBarry Smith     sum2  = 0.0;
1797dbdd7285SBarry Smith     sum3  = 0.0;
1798dbdd7285SBarry Smith     sum4  = 0.0;
1799dbdd7285SBarry Smith     sum5  = 0.0;
1800dbdd7285SBarry Smith     sum6  = 0.0;
1801dbdd7285SBarry Smith     sum7  = 0.0;
1802dbdd7285SBarry Smith     sum8  = 0.0;
1803dbdd7285SBarry Smith     sum9  = 0.0;
1804dbdd7285SBarry Smith     sum10 = 0.0;
1805dbdd7285SBarry Smith     sum11 = 0.0;
180626fbe8dcSKarl Rupp 
1807dbdd7285SBarry Smith     nonzerorow += (n > 0);
1808dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1809dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1810dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1811dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1812dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1813dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1814dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1815dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1816dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1817dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1818dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1819dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1820dbdd7285SBarry Smith       jrow++;
1821dbdd7285SBarry Smith     }
1822dbdd7285SBarry Smith     y[11 * i]      = sum1;
1823dbdd7285SBarry Smith     y[11 * i + 1]  = sum2;
1824dbdd7285SBarry Smith     y[11 * i + 2]  = sum3;
1825dbdd7285SBarry Smith     y[11 * i + 3]  = sum4;
1826dbdd7285SBarry Smith     y[11 * i + 4]  = sum5;
1827dbdd7285SBarry Smith     y[11 * i + 5]  = sum6;
1828dbdd7285SBarry Smith     y[11 * i + 6]  = sum7;
1829dbdd7285SBarry Smith     y[11 * i + 7]  = sum8;
1830dbdd7285SBarry Smith     y[11 * i + 8]  = sum9;
1831dbdd7285SBarry Smith     y[11 * i + 9]  = sum10;
1832dbdd7285SBarry Smith     y[11 * i + 10] = sum11;
1833dbdd7285SBarry Smith   }
1834dbdd7285SBarry Smith 
18359566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
18369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
18379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1838dbdd7285SBarry Smith   PetscFunctionReturn(0);
1839dbdd7285SBarry Smith }
1840dbdd7285SBarry Smith 
18419371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1842dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1843dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1844d2840e09SBarry Smith   const PetscScalar *x, *v;
1845d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1846d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1847dbdd7285SBarry Smith   PetscInt           n, i, jrow, j;
1848dbdd7285SBarry Smith 
1849dbdd7285SBarry Smith   PetscFunctionBegin;
18509566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
18519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
18529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1853dbdd7285SBarry Smith   idx = a->j;
1854dbdd7285SBarry Smith   v   = a->a;
1855dbdd7285SBarry Smith   ii  = a->i;
1856dbdd7285SBarry Smith 
1857dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1858dbdd7285SBarry Smith     jrow  = ii[i];
1859dbdd7285SBarry Smith     n     = ii[i + 1] - jrow;
1860dbdd7285SBarry Smith     sum1  = 0.0;
1861dbdd7285SBarry Smith     sum2  = 0.0;
1862dbdd7285SBarry Smith     sum3  = 0.0;
1863dbdd7285SBarry Smith     sum4  = 0.0;
1864dbdd7285SBarry Smith     sum5  = 0.0;
1865dbdd7285SBarry Smith     sum6  = 0.0;
1866dbdd7285SBarry Smith     sum7  = 0.0;
1867dbdd7285SBarry Smith     sum8  = 0.0;
1868dbdd7285SBarry Smith     sum9  = 0.0;
1869dbdd7285SBarry Smith     sum10 = 0.0;
1870dbdd7285SBarry Smith     sum11 = 0.0;
1871dbdd7285SBarry Smith     for (j = 0; j < n; j++) {
1872dbdd7285SBarry Smith       sum1 += v[jrow] * x[11 * idx[jrow]];
1873dbdd7285SBarry Smith       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1874dbdd7285SBarry Smith       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1875dbdd7285SBarry Smith       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1876dbdd7285SBarry Smith       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1877dbdd7285SBarry Smith       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1878dbdd7285SBarry Smith       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1879dbdd7285SBarry Smith       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1880dbdd7285SBarry Smith       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1881dbdd7285SBarry Smith       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1882dbdd7285SBarry Smith       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1883dbdd7285SBarry Smith       jrow++;
1884dbdd7285SBarry Smith     }
1885dbdd7285SBarry Smith     y[11 * i] += sum1;
1886dbdd7285SBarry Smith     y[11 * i + 1] += sum2;
1887dbdd7285SBarry Smith     y[11 * i + 2] += sum3;
1888dbdd7285SBarry Smith     y[11 * i + 3] += sum4;
1889dbdd7285SBarry Smith     y[11 * i + 4] += sum5;
1890dbdd7285SBarry Smith     y[11 * i + 5] += sum6;
1891dbdd7285SBarry Smith     y[11 * i + 6] += sum7;
1892dbdd7285SBarry Smith     y[11 * i + 7] += sum8;
1893dbdd7285SBarry Smith     y[11 * i + 8] += sum9;
1894dbdd7285SBarry Smith     y[11 * i + 9] += sum10;
1895dbdd7285SBarry Smith     y[11 * i + 10] += sum11;
1896dbdd7285SBarry Smith   }
1897dbdd7285SBarry Smith 
18989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
18999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1901dbdd7285SBarry Smith   PetscFunctionReturn(0);
1902dbdd7285SBarry Smith }
1903dbdd7285SBarry Smith 
19049371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) {
1905dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1906dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1907d2840e09SBarry Smith   const PetscScalar *x, *v;
1908d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1909d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1910d2840e09SBarry Smith   PetscInt           n, i;
1911dbdd7285SBarry Smith 
1912dbdd7285SBarry Smith   PetscFunctionBegin;
19139566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
19149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
1916dbdd7285SBarry Smith 
1917dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1918dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1919dbdd7285SBarry Smith     v       = a->a + a->i[i];
1920dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1921dbdd7285SBarry Smith     alpha1  = x[11 * i];
1922dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1923dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1924dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1925dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1926dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1927dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1928dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1929dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1930dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1931dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1932dbdd7285SBarry Smith     while (n-- > 0) {
1933dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1934dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1935dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1936dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1937dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1938dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1939dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1940dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1941dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1942dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1943dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19449371c9d4SSatish Balay       idx++;
19459371c9d4SSatish Balay       v++;
1946dbdd7285SBarry Smith     }
1947dbdd7285SBarry Smith   }
19489566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19499566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1951dbdd7285SBarry Smith   PetscFunctionReturn(0);
1952dbdd7285SBarry Smith }
1953dbdd7285SBarry Smith 
19549371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) {
1955dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1956dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1957d2840e09SBarry Smith   const PetscScalar *x, *v;
1958d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1959d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
1960d2840e09SBarry Smith   PetscInt           n, i;
1961dbdd7285SBarry Smith 
1962dbdd7285SBarry Smith   PetscFunctionBegin;
19639566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
19649566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
19659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
1966dbdd7285SBarry Smith   for (i = 0; i < m; i++) {
1967dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1968dbdd7285SBarry Smith     v       = a->a + a->i[i];
1969dbdd7285SBarry Smith     n       = a->i[i + 1] - a->i[i];
1970dbdd7285SBarry Smith     alpha1  = x[11 * i];
1971dbdd7285SBarry Smith     alpha2  = x[11 * i + 1];
1972dbdd7285SBarry Smith     alpha3  = x[11 * i + 2];
1973dbdd7285SBarry Smith     alpha4  = x[11 * i + 3];
1974dbdd7285SBarry Smith     alpha5  = x[11 * i + 4];
1975dbdd7285SBarry Smith     alpha6  = x[11 * i + 5];
1976dbdd7285SBarry Smith     alpha7  = x[11 * i + 6];
1977dbdd7285SBarry Smith     alpha8  = x[11 * i + 7];
1978dbdd7285SBarry Smith     alpha9  = x[11 * i + 8];
1979dbdd7285SBarry Smith     alpha10 = x[11 * i + 9];
1980dbdd7285SBarry Smith     alpha11 = x[11 * i + 10];
1981dbdd7285SBarry Smith     while (n-- > 0) {
1982dbdd7285SBarry Smith       y[11 * (*idx)] += alpha1 * (*v);
1983dbdd7285SBarry Smith       y[11 * (*idx) + 1] += alpha2 * (*v);
1984dbdd7285SBarry Smith       y[11 * (*idx) + 2] += alpha3 * (*v);
1985dbdd7285SBarry Smith       y[11 * (*idx) + 3] += alpha4 * (*v);
1986dbdd7285SBarry Smith       y[11 * (*idx) + 4] += alpha5 * (*v);
1987dbdd7285SBarry Smith       y[11 * (*idx) + 5] += alpha6 * (*v);
1988dbdd7285SBarry Smith       y[11 * (*idx) + 6] += alpha7 * (*v);
1989dbdd7285SBarry Smith       y[11 * (*idx) + 7] += alpha8 * (*v);
1990dbdd7285SBarry Smith       y[11 * (*idx) + 8] += alpha9 * (*v);
1991dbdd7285SBarry Smith       y[11 * (*idx) + 9] += alpha10 * (*v);
1992dbdd7285SBarry Smith       y[11 * (*idx) + 10] += alpha11 * (*v);
19939371c9d4SSatish Balay       idx++;
19949371c9d4SSatish Balay       v++;
1995dbdd7285SBarry Smith     }
1996dbdd7285SBarry Smith   }
19979566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(22.0 * a->nz));
19989566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
19999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2000dbdd7285SBarry Smith   PetscFunctionReturn(0);
2001dbdd7285SBarry Smith }
2002dbdd7285SBarry Smith 
2003dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
20049371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
20052f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
20062f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2007d2840e09SBarry Smith   const PetscScalar *x, *v;
2008d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20092f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2010d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2011d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
20122f7816d4SBarry Smith 
20132f7816d4SBarry Smith   PetscFunctionBegin;
20149566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
20159566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
20162f7816d4SBarry Smith   idx = a->j;
20172f7816d4SBarry Smith   v   = a->a;
20182f7816d4SBarry Smith   ii  = a->i;
20192f7816d4SBarry Smith 
20202f7816d4SBarry Smith   for (i = 0; i < m; i++) {
20212f7816d4SBarry Smith     jrow  = ii[i];
20222f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
20232f7816d4SBarry Smith     sum1  = 0.0;
20242f7816d4SBarry Smith     sum2  = 0.0;
20252f7816d4SBarry Smith     sum3  = 0.0;
20262f7816d4SBarry Smith     sum4  = 0.0;
20272f7816d4SBarry Smith     sum5  = 0.0;
20282f7816d4SBarry Smith     sum6  = 0.0;
20292f7816d4SBarry Smith     sum7  = 0.0;
20302f7816d4SBarry Smith     sum8  = 0.0;
20312f7816d4SBarry Smith     sum9  = 0.0;
20322f7816d4SBarry Smith     sum10 = 0.0;
20332f7816d4SBarry Smith     sum11 = 0.0;
20342f7816d4SBarry Smith     sum12 = 0.0;
20352f7816d4SBarry Smith     sum13 = 0.0;
20362f7816d4SBarry Smith     sum14 = 0.0;
20372f7816d4SBarry Smith     sum15 = 0.0;
20382f7816d4SBarry Smith     sum16 = 0.0;
203926fbe8dcSKarl Rupp 
2040b3c51c66SMatthew Knepley     nonzerorow += (n > 0);
20412f7816d4SBarry Smith     for (j = 0; j < n; j++) {
20422f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
20432f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
20442f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
20452f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
20462f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
20472f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
20482f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
20492f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
20502f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
20512f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
20522f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
20532f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
20542f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
20552f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
20562f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
20572f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
20582f7816d4SBarry Smith       jrow++;
20592f7816d4SBarry Smith     }
20602f7816d4SBarry Smith     y[16 * i]      = sum1;
20612f7816d4SBarry Smith     y[16 * i + 1]  = sum2;
20622f7816d4SBarry Smith     y[16 * i + 2]  = sum3;
20632f7816d4SBarry Smith     y[16 * i + 3]  = sum4;
20642f7816d4SBarry Smith     y[16 * i + 4]  = sum5;
20652f7816d4SBarry Smith     y[16 * i + 5]  = sum6;
20662f7816d4SBarry Smith     y[16 * i + 6]  = sum7;
20672f7816d4SBarry Smith     y[16 * i + 7]  = sum8;
20682f7816d4SBarry Smith     y[16 * i + 8]  = sum9;
20692f7816d4SBarry Smith     y[16 * i + 9]  = sum10;
20702f7816d4SBarry Smith     y[16 * i + 10] = sum11;
20712f7816d4SBarry Smith     y[16 * i + 11] = sum12;
20722f7816d4SBarry Smith     y[16 * i + 12] = sum13;
20732f7816d4SBarry Smith     y[16 * i + 13] = sum14;
20742f7816d4SBarry Smith     y[16 * i + 14] = sum15;
20752f7816d4SBarry Smith     y[16 * i + 15] = sum16;
20762f7816d4SBarry Smith   }
20772f7816d4SBarry Smith 
20789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
20799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
20809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
20812f7816d4SBarry Smith   PetscFunctionReturn(0);
20822f7816d4SBarry Smith }
20832f7816d4SBarry Smith 
20849371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) {
20852f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
20862f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2087d2840e09SBarry Smith   const PetscScalar *x, *v;
2088d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
20892f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2090d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2091d2840e09SBarry Smith   PetscInt           n, i;
20922f7816d4SBarry Smith 
20932f7816d4SBarry Smith   PetscFunctionBegin;
20949566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
20959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
20969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2097bfec09a0SHong Zhang 
20982f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2099bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2100bfec09a0SHong Zhang     v       = a->a + a->i[i];
21012f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
21022f7816d4SBarry Smith     alpha1  = x[16 * i];
21032f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
21042f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
21052f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
21062f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
21072f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
21082f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
21092f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
21102f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
21112f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
21122f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
21132f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
21142f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
21152f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
21162f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
21172f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
21182f7816d4SBarry Smith     while (n-- > 0) {
21192f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
21202f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
21212f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
21222f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
21232f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
21242f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
21252f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
21262f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
21272f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
21282f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
21292f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
21302f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
21312f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
21322f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
21332f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
21342f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
21359371c9d4SSatish Balay       idx++;
21369371c9d4SSatish Balay       v++;
21372f7816d4SBarry Smith     }
21382f7816d4SBarry Smith   }
21399566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
21409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
21419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
21422f7816d4SBarry Smith   PetscFunctionReturn(0);
21432f7816d4SBarry Smith }
21442f7816d4SBarry Smith 
21459371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
21462f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
21472f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2148d2840e09SBarry Smith   const PetscScalar *x, *v;
2149d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21502f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2151d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2152b24ad042SBarry Smith   PetscInt           n, i, jrow, j;
21532f7816d4SBarry Smith 
21542f7816d4SBarry Smith   PetscFunctionBegin;
21559566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
21569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
21579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
21582f7816d4SBarry Smith   idx = a->j;
21592f7816d4SBarry Smith   v   = a->a;
21602f7816d4SBarry Smith   ii  = a->i;
21612f7816d4SBarry Smith 
21622f7816d4SBarry Smith   for (i = 0; i < m; i++) {
21632f7816d4SBarry Smith     jrow  = ii[i];
21642f7816d4SBarry Smith     n     = ii[i + 1] - jrow;
21652f7816d4SBarry Smith     sum1  = 0.0;
21662f7816d4SBarry Smith     sum2  = 0.0;
21672f7816d4SBarry Smith     sum3  = 0.0;
21682f7816d4SBarry Smith     sum4  = 0.0;
21692f7816d4SBarry Smith     sum5  = 0.0;
21702f7816d4SBarry Smith     sum6  = 0.0;
21712f7816d4SBarry Smith     sum7  = 0.0;
21722f7816d4SBarry Smith     sum8  = 0.0;
21732f7816d4SBarry Smith     sum9  = 0.0;
21742f7816d4SBarry Smith     sum10 = 0.0;
21752f7816d4SBarry Smith     sum11 = 0.0;
21762f7816d4SBarry Smith     sum12 = 0.0;
21772f7816d4SBarry Smith     sum13 = 0.0;
21782f7816d4SBarry Smith     sum14 = 0.0;
21792f7816d4SBarry Smith     sum15 = 0.0;
21802f7816d4SBarry Smith     sum16 = 0.0;
21812f7816d4SBarry Smith     for (j = 0; j < n; j++) {
21822f7816d4SBarry Smith       sum1 += v[jrow] * x[16 * idx[jrow]];
21832f7816d4SBarry Smith       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
21842f7816d4SBarry Smith       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
21852f7816d4SBarry Smith       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
21862f7816d4SBarry Smith       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
21872f7816d4SBarry Smith       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
21882f7816d4SBarry Smith       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
21892f7816d4SBarry Smith       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
21902f7816d4SBarry Smith       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
21912f7816d4SBarry Smith       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
21922f7816d4SBarry Smith       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
21932f7816d4SBarry Smith       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
21942f7816d4SBarry Smith       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
21952f7816d4SBarry Smith       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
21962f7816d4SBarry Smith       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
21972f7816d4SBarry Smith       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
21982f7816d4SBarry Smith       jrow++;
21992f7816d4SBarry Smith     }
22002f7816d4SBarry Smith     y[16 * i] += sum1;
22012f7816d4SBarry Smith     y[16 * i + 1] += sum2;
22022f7816d4SBarry Smith     y[16 * i + 2] += sum3;
22032f7816d4SBarry Smith     y[16 * i + 3] += sum4;
22042f7816d4SBarry Smith     y[16 * i + 4] += sum5;
22052f7816d4SBarry Smith     y[16 * i + 5] += sum6;
22062f7816d4SBarry Smith     y[16 * i + 6] += sum7;
22072f7816d4SBarry Smith     y[16 * i + 7] += sum8;
22082f7816d4SBarry Smith     y[16 * i + 8] += sum9;
22092f7816d4SBarry Smith     y[16 * i + 9] += sum10;
22102f7816d4SBarry Smith     y[16 * i + 10] += sum11;
22112f7816d4SBarry Smith     y[16 * i + 11] += sum12;
22122f7816d4SBarry Smith     y[16 * i + 12] += sum13;
22132f7816d4SBarry Smith     y[16 * i + 13] += sum14;
22142f7816d4SBarry Smith     y[16 * i + 14] += sum15;
22152f7816d4SBarry Smith     y[16 * i + 15] += sum16;
22162f7816d4SBarry Smith   }
22172f7816d4SBarry Smith 
22189566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
22212f7816d4SBarry Smith   PetscFunctionReturn(0);
22222f7816d4SBarry Smith }
22232f7816d4SBarry Smith 
22249371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) {
22252f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
22262f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2227d2840e09SBarry Smith   const PetscScalar *x, *v;
2228d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
22292f7816d4SBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2230d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2231d2840e09SBarry Smith   PetscInt           n, i;
22322f7816d4SBarry Smith 
22332f7816d4SBarry Smith   PetscFunctionBegin;
22349566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
22359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
22372f7816d4SBarry Smith   for (i = 0; i < m; i++) {
2238bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2239bfec09a0SHong Zhang     v       = a->a + a->i[i];
22402f7816d4SBarry Smith     n       = a->i[i + 1] - a->i[i];
22412f7816d4SBarry Smith     alpha1  = x[16 * i];
22422f7816d4SBarry Smith     alpha2  = x[16 * i + 1];
22432f7816d4SBarry Smith     alpha3  = x[16 * i + 2];
22442f7816d4SBarry Smith     alpha4  = x[16 * i + 3];
22452f7816d4SBarry Smith     alpha5  = x[16 * i + 4];
22462f7816d4SBarry Smith     alpha6  = x[16 * i + 5];
22472f7816d4SBarry Smith     alpha7  = x[16 * i + 6];
22482f7816d4SBarry Smith     alpha8  = x[16 * i + 7];
22492f7816d4SBarry Smith     alpha9  = x[16 * i + 8];
22502f7816d4SBarry Smith     alpha10 = x[16 * i + 9];
22512f7816d4SBarry Smith     alpha11 = x[16 * i + 10];
22522f7816d4SBarry Smith     alpha12 = x[16 * i + 11];
22532f7816d4SBarry Smith     alpha13 = x[16 * i + 12];
22542f7816d4SBarry Smith     alpha14 = x[16 * i + 13];
22552f7816d4SBarry Smith     alpha15 = x[16 * i + 14];
22562f7816d4SBarry Smith     alpha16 = x[16 * i + 15];
22572f7816d4SBarry Smith     while (n-- > 0) {
22582f7816d4SBarry Smith       y[16 * (*idx)] += alpha1 * (*v);
22592f7816d4SBarry Smith       y[16 * (*idx) + 1] += alpha2 * (*v);
22602f7816d4SBarry Smith       y[16 * (*idx) + 2] += alpha3 * (*v);
22612f7816d4SBarry Smith       y[16 * (*idx) + 3] += alpha4 * (*v);
22622f7816d4SBarry Smith       y[16 * (*idx) + 4] += alpha5 * (*v);
22632f7816d4SBarry Smith       y[16 * (*idx) + 5] += alpha6 * (*v);
22642f7816d4SBarry Smith       y[16 * (*idx) + 6] += alpha7 * (*v);
22652f7816d4SBarry Smith       y[16 * (*idx) + 7] += alpha8 * (*v);
22662f7816d4SBarry Smith       y[16 * (*idx) + 8] += alpha9 * (*v);
22672f7816d4SBarry Smith       y[16 * (*idx) + 9] += alpha10 * (*v);
22682f7816d4SBarry Smith       y[16 * (*idx) + 10] += alpha11 * (*v);
22692f7816d4SBarry Smith       y[16 * (*idx) + 11] += alpha12 * (*v);
22702f7816d4SBarry Smith       y[16 * (*idx) + 12] += alpha13 * (*v);
22712f7816d4SBarry Smith       y[16 * (*idx) + 13] += alpha14 * (*v);
22722f7816d4SBarry Smith       y[16 * (*idx) + 14] += alpha15 * (*v);
22732f7816d4SBarry Smith       y[16 * (*idx) + 15] += alpha16 * (*v);
22749371c9d4SSatish Balay       idx++;
22759371c9d4SSatish Balay       v++;
22762f7816d4SBarry Smith     }
22772f7816d4SBarry Smith   }
22789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(32.0 * a->nz));
22799566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
22809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
228166ed3db0SBarry Smith   PetscFunctionReturn(0);
228266ed3db0SBarry Smith }
228366ed3db0SBarry Smith 
2284ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
22859371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2286ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2287ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2288d2840e09SBarry Smith   const PetscScalar *x, *v;
2289d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2290ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2291d2840e09SBarry Smith   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2292d2840e09SBarry Smith   PetscInt           nonzerorow = 0, n, i, jrow, j;
2293ed1c418cSBarry Smith 
2294ed1c418cSBarry Smith   PetscFunctionBegin;
22959566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
22969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2297ed1c418cSBarry Smith   idx = a->j;
2298ed1c418cSBarry Smith   v   = a->a;
2299ed1c418cSBarry Smith   ii  = a->i;
2300ed1c418cSBarry Smith 
2301ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2302ed1c418cSBarry Smith     jrow  = ii[i];
2303ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2304ed1c418cSBarry Smith     sum1  = 0.0;
2305ed1c418cSBarry Smith     sum2  = 0.0;
2306ed1c418cSBarry Smith     sum3  = 0.0;
2307ed1c418cSBarry Smith     sum4  = 0.0;
2308ed1c418cSBarry Smith     sum5  = 0.0;
2309ed1c418cSBarry Smith     sum6  = 0.0;
2310ed1c418cSBarry Smith     sum7  = 0.0;
2311ed1c418cSBarry Smith     sum8  = 0.0;
2312ed1c418cSBarry Smith     sum9  = 0.0;
2313ed1c418cSBarry Smith     sum10 = 0.0;
2314ed1c418cSBarry Smith     sum11 = 0.0;
2315ed1c418cSBarry Smith     sum12 = 0.0;
2316ed1c418cSBarry Smith     sum13 = 0.0;
2317ed1c418cSBarry Smith     sum14 = 0.0;
2318ed1c418cSBarry Smith     sum15 = 0.0;
2319ed1c418cSBarry Smith     sum16 = 0.0;
2320ed1c418cSBarry Smith     sum17 = 0.0;
2321ed1c418cSBarry Smith     sum18 = 0.0;
232226fbe8dcSKarl Rupp 
2323ed1c418cSBarry Smith     nonzerorow += (n > 0);
2324ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2325ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2326ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2327ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2328ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2329ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2330ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2331ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2332ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2333ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2334ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2335ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2336ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2337ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2338ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2339ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2340ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2341ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2342ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2343ed1c418cSBarry Smith       jrow++;
2344ed1c418cSBarry Smith     }
2345ed1c418cSBarry Smith     y[18 * i]      = sum1;
2346ed1c418cSBarry Smith     y[18 * i + 1]  = sum2;
2347ed1c418cSBarry Smith     y[18 * i + 2]  = sum3;
2348ed1c418cSBarry Smith     y[18 * i + 3]  = sum4;
2349ed1c418cSBarry Smith     y[18 * i + 4]  = sum5;
2350ed1c418cSBarry Smith     y[18 * i + 5]  = sum6;
2351ed1c418cSBarry Smith     y[18 * i + 6]  = sum7;
2352ed1c418cSBarry Smith     y[18 * i + 7]  = sum8;
2353ed1c418cSBarry Smith     y[18 * i + 8]  = sum9;
2354ed1c418cSBarry Smith     y[18 * i + 9]  = sum10;
2355ed1c418cSBarry Smith     y[18 * i + 10] = sum11;
2356ed1c418cSBarry Smith     y[18 * i + 11] = sum12;
2357ed1c418cSBarry Smith     y[18 * i + 12] = sum13;
2358ed1c418cSBarry Smith     y[18 * i + 13] = sum14;
2359ed1c418cSBarry Smith     y[18 * i + 14] = sum15;
2360ed1c418cSBarry Smith     y[18 * i + 15] = sum16;
2361ed1c418cSBarry Smith     y[18 * i + 16] = sum17;
2362ed1c418cSBarry Smith     y[18 * i + 17] = sum18;
2363ed1c418cSBarry Smith   }
2364ed1c418cSBarry Smith 
23659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
23669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
23679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2368ed1c418cSBarry Smith   PetscFunctionReturn(0);
2369ed1c418cSBarry Smith }
2370ed1c418cSBarry Smith 
23719371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) {
2372ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2373ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2374d2840e09SBarry Smith   const PetscScalar *x, *v;
2375d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2376ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2377d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2378d2840e09SBarry Smith   PetscInt           n, i;
2379ed1c418cSBarry Smith 
2380ed1c418cSBarry Smith   PetscFunctionBegin;
23819566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
23829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
23839566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2384ed1c418cSBarry Smith 
2385ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2386ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2387ed1c418cSBarry Smith     v       = a->a + a->i[i];
2388ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2389ed1c418cSBarry Smith     alpha1  = x[18 * i];
2390ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2391ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2392ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2393ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2394ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2395ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2396ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2397ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2398ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2399ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2400ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2401ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2402ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2403ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2404ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2405ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2406ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2407ed1c418cSBarry Smith     while (n-- > 0) {
2408ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2409ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2410ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2411ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2412ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2413ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2414ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2415ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2416ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2417ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2418ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2419ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2420ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2421ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2422ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2423ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2424ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2425ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
24269371c9d4SSatish Balay       idx++;
24279371c9d4SSatish Balay       v++;
2428ed1c418cSBarry Smith     }
2429ed1c418cSBarry Smith   }
24309566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
24319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
24329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2433ed1c418cSBarry Smith   PetscFunctionReturn(0);
2434ed1c418cSBarry Smith }
2435ed1c418cSBarry Smith 
24369371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2437ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2438ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2439d2840e09SBarry Smith   const PetscScalar *x, *v;
2440d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2441ed1c418cSBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2442d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2443ed1c418cSBarry Smith   PetscInt           n, i, jrow, j;
2444ed1c418cSBarry Smith 
2445ed1c418cSBarry Smith   PetscFunctionBegin;
24469566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
24479566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
24489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2449ed1c418cSBarry Smith   idx = a->j;
2450ed1c418cSBarry Smith   v   = a->a;
2451ed1c418cSBarry Smith   ii  = a->i;
2452ed1c418cSBarry Smith 
2453ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2454ed1c418cSBarry Smith     jrow  = ii[i];
2455ed1c418cSBarry Smith     n     = ii[i + 1] - jrow;
2456ed1c418cSBarry Smith     sum1  = 0.0;
2457ed1c418cSBarry Smith     sum2  = 0.0;
2458ed1c418cSBarry Smith     sum3  = 0.0;
2459ed1c418cSBarry Smith     sum4  = 0.0;
2460ed1c418cSBarry Smith     sum5  = 0.0;
2461ed1c418cSBarry Smith     sum6  = 0.0;
2462ed1c418cSBarry Smith     sum7  = 0.0;
2463ed1c418cSBarry Smith     sum8  = 0.0;
2464ed1c418cSBarry Smith     sum9  = 0.0;
2465ed1c418cSBarry Smith     sum10 = 0.0;
2466ed1c418cSBarry Smith     sum11 = 0.0;
2467ed1c418cSBarry Smith     sum12 = 0.0;
2468ed1c418cSBarry Smith     sum13 = 0.0;
2469ed1c418cSBarry Smith     sum14 = 0.0;
2470ed1c418cSBarry Smith     sum15 = 0.0;
2471ed1c418cSBarry Smith     sum16 = 0.0;
2472ed1c418cSBarry Smith     sum17 = 0.0;
2473ed1c418cSBarry Smith     sum18 = 0.0;
2474ed1c418cSBarry Smith     for (j = 0; j < n; j++) {
2475ed1c418cSBarry Smith       sum1 += v[jrow] * x[18 * idx[jrow]];
2476ed1c418cSBarry Smith       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2477ed1c418cSBarry Smith       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2478ed1c418cSBarry Smith       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2479ed1c418cSBarry Smith       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2480ed1c418cSBarry Smith       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2481ed1c418cSBarry Smith       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2482ed1c418cSBarry Smith       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2483ed1c418cSBarry Smith       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2484ed1c418cSBarry Smith       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2485ed1c418cSBarry Smith       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2486ed1c418cSBarry Smith       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2487ed1c418cSBarry Smith       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2488ed1c418cSBarry Smith       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2489ed1c418cSBarry Smith       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2490ed1c418cSBarry Smith       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2491ed1c418cSBarry Smith       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2492ed1c418cSBarry Smith       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2493ed1c418cSBarry Smith       jrow++;
2494ed1c418cSBarry Smith     }
2495ed1c418cSBarry Smith     y[18 * i] += sum1;
2496ed1c418cSBarry Smith     y[18 * i + 1] += sum2;
2497ed1c418cSBarry Smith     y[18 * i + 2] += sum3;
2498ed1c418cSBarry Smith     y[18 * i + 3] += sum4;
2499ed1c418cSBarry Smith     y[18 * i + 4] += sum5;
2500ed1c418cSBarry Smith     y[18 * i + 5] += sum6;
2501ed1c418cSBarry Smith     y[18 * i + 6] += sum7;
2502ed1c418cSBarry Smith     y[18 * i + 7] += sum8;
2503ed1c418cSBarry Smith     y[18 * i + 8] += sum9;
2504ed1c418cSBarry Smith     y[18 * i + 9] += sum10;
2505ed1c418cSBarry Smith     y[18 * i + 10] += sum11;
2506ed1c418cSBarry Smith     y[18 * i + 11] += sum12;
2507ed1c418cSBarry Smith     y[18 * i + 12] += sum13;
2508ed1c418cSBarry Smith     y[18 * i + 13] += sum14;
2509ed1c418cSBarry Smith     y[18 * i + 14] += sum15;
2510ed1c418cSBarry Smith     y[18 * i + 15] += sum16;
2511ed1c418cSBarry Smith     y[18 * i + 16] += sum17;
2512ed1c418cSBarry Smith     y[18 * i + 17] += sum18;
2513ed1c418cSBarry Smith   }
2514ed1c418cSBarry Smith 
25159566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
25169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2518ed1c418cSBarry Smith   PetscFunctionReturn(0);
2519ed1c418cSBarry Smith }
2520ed1c418cSBarry Smith 
25219371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) {
2522ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2523ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2524d2840e09SBarry Smith   const PetscScalar *x, *v;
2525d2840e09SBarry Smith   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2526ed1c418cSBarry Smith   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2527d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx;
2528d2840e09SBarry Smith   PetscInt           n, i;
2529ed1c418cSBarry Smith 
2530ed1c418cSBarry Smith   PetscFunctionBegin;
25319566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
25329566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25339566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
2534ed1c418cSBarry Smith   for (i = 0; i < m; i++) {
2535ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2536ed1c418cSBarry Smith     v       = a->a + a->i[i];
2537ed1c418cSBarry Smith     n       = a->i[i + 1] - a->i[i];
2538ed1c418cSBarry Smith     alpha1  = x[18 * i];
2539ed1c418cSBarry Smith     alpha2  = x[18 * i + 1];
2540ed1c418cSBarry Smith     alpha3  = x[18 * i + 2];
2541ed1c418cSBarry Smith     alpha4  = x[18 * i + 3];
2542ed1c418cSBarry Smith     alpha5  = x[18 * i + 4];
2543ed1c418cSBarry Smith     alpha6  = x[18 * i + 5];
2544ed1c418cSBarry Smith     alpha7  = x[18 * i + 6];
2545ed1c418cSBarry Smith     alpha8  = x[18 * i + 7];
2546ed1c418cSBarry Smith     alpha9  = x[18 * i + 8];
2547ed1c418cSBarry Smith     alpha10 = x[18 * i + 9];
2548ed1c418cSBarry Smith     alpha11 = x[18 * i + 10];
2549ed1c418cSBarry Smith     alpha12 = x[18 * i + 11];
2550ed1c418cSBarry Smith     alpha13 = x[18 * i + 12];
2551ed1c418cSBarry Smith     alpha14 = x[18 * i + 13];
2552ed1c418cSBarry Smith     alpha15 = x[18 * i + 14];
2553ed1c418cSBarry Smith     alpha16 = x[18 * i + 15];
2554ed1c418cSBarry Smith     alpha17 = x[18 * i + 16];
2555ed1c418cSBarry Smith     alpha18 = x[18 * i + 17];
2556ed1c418cSBarry Smith     while (n-- > 0) {
2557ed1c418cSBarry Smith       y[18 * (*idx)] += alpha1 * (*v);
2558ed1c418cSBarry Smith       y[18 * (*idx) + 1] += alpha2 * (*v);
2559ed1c418cSBarry Smith       y[18 * (*idx) + 2] += alpha3 * (*v);
2560ed1c418cSBarry Smith       y[18 * (*idx) + 3] += alpha4 * (*v);
2561ed1c418cSBarry Smith       y[18 * (*idx) + 4] += alpha5 * (*v);
2562ed1c418cSBarry Smith       y[18 * (*idx) + 5] += alpha6 * (*v);
2563ed1c418cSBarry Smith       y[18 * (*idx) + 6] += alpha7 * (*v);
2564ed1c418cSBarry Smith       y[18 * (*idx) + 7] += alpha8 * (*v);
2565ed1c418cSBarry Smith       y[18 * (*idx) + 8] += alpha9 * (*v);
2566ed1c418cSBarry Smith       y[18 * (*idx) + 9] += alpha10 * (*v);
2567ed1c418cSBarry Smith       y[18 * (*idx) + 10] += alpha11 * (*v);
2568ed1c418cSBarry Smith       y[18 * (*idx) + 11] += alpha12 * (*v);
2569ed1c418cSBarry Smith       y[18 * (*idx) + 12] += alpha13 * (*v);
2570ed1c418cSBarry Smith       y[18 * (*idx) + 13] += alpha14 * (*v);
2571ed1c418cSBarry Smith       y[18 * (*idx) + 14] += alpha15 * (*v);
2572ed1c418cSBarry Smith       y[18 * (*idx) + 15] += alpha16 * (*v);
2573ed1c418cSBarry Smith       y[18 * (*idx) + 16] += alpha17 * (*v);
2574ed1c418cSBarry Smith       y[18 * (*idx) + 17] += alpha18 * (*v);
25759371c9d4SSatish Balay       idx++;
25769371c9d4SSatish Balay       v++;
2577ed1c418cSBarry Smith     }
2578ed1c418cSBarry Smith   }
25799566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(36.0 * a->nz));
25809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
25819566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
2582ed1c418cSBarry Smith   PetscFunctionReturn(0);
2583ed1c418cSBarry Smith }
2584ed1c418cSBarry Smith 
25859371c9d4SSatish Balay PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
25866861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
25876861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
25886861f193SBarry Smith   const PetscScalar *x, *v;
25896861f193SBarry Smith   PetscScalar       *y, *sums;
25906861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
25916861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
25926861f193SBarry Smith 
25936861f193SBarry Smith   PetscFunctionBegin;
25949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
25959566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
25969566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
25976861f193SBarry Smith   idx = a->j;
25986861f193SBarry Smith   v   = a->a;
25996861f193SBarry Smith   ii  = a->i;
26006861f193SBarry Smith 
26016861f193SBarry Smith   for (i = 0; i < m; i++) {
26026861f193SBarry Smith     jrow = ii[i];
26036861f193SBarry Smith     n    = ii[i + 1] - jrow;
26046861f193SBarry Smith     sums = y + dof * i;
26056861f193SBarry Smith     for (j = 0; j < n; j++) {
26069371c9d4SSatish Balay       for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; }
26076861f193SBarry Smith       jrow++;
26086861f193SBarry Smith     }
26096861f193SBarry Smith   }
26106861f193SBarry Smith 
26119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26129566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
26146861f193SBarry Smith   PetscFunctionReturn(0);
26156861f193SBarry Smith }
26166861f193SBarry Smith 
26179371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26186861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26196861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26206861f193SBarry Smith   const PetscScalar *x, *v;
26216861f193SBarry Smith   PetscScalar       *y, *sums;
26226861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
26236861f193SBarry Smith   PetscInt           n, i, jrow, j, dof = b->dof, k;
26246861f193SBarry Smith 
26256861f193SBarry Smith   PetscFunctionBegin;
26269566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26279566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26289566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26296861f193SBarry Smith   idx = a->j;
26306861f193SBarry Smith   v   = a->a;
26316861f193SBarry Smith   ii  = a->i;
26326861f193SBarry Smith 
26336861f193SBarry Smith   for (i = 0; i < m; i++) {
26346861f193SBarry Smith     jrow = ii[i];
26356861f193SBarry Smith     n    = ii[i + 1] - jrow;
26366861f193SBarry Smith     sums = y + dof * i;
26376861f193SBarry Smith     for (j = 0; j < n; j++) {
26389371c9d4SSatish Balay       for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; }
26396861f193SBarry Smith       jrow++;
26406861f193SBarry Smith     }
26416861f193SBarry Smith   }
26426861f193SBarry Smith 
26439566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
26466861f193SBarry Smith   PetscFunctionReturn(0);
26476861f193SBarry Smith }
26486861f193SBarry Smith 
26499371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) {
26506861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26516861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26526861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26536861f193SBarry Smith   PetscScalar       *y;
26546861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26556861f193SBarry Smith   PetscInt           n, i, k;
26566861f193SBarry Smith 
26576861f193SBarry Smith   PetscFunctionBegin;
26589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26599566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
26609566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
26616861f193SBarry Smith   for (i = 0; i < m; i++) {
26626861f193SBarry Smith     idx   = a->j + a->i[i];
26636861f193SBarry Smith     v     = a->a + a->i[i];
26646861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26656861f193SBarry Smith     alpha = x + dof * i;
26666861f193SBarry Smith     while (n-- > 0) {
26679371c9d4SSatish Balay       for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); }
26689371c9d4SSatish Balay       idx++;
26699371c9d4SSatish Balay       v++;
26706861f193SBarry Smith     }
26716861f193SBarry Smith   }
26729566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
26739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
26749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
26756861f193SBarry Smith   PetscFunctionReturn(0);
26766861f193SBarry Smith }
26776861f193SBarry Smith 
26789371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) {
26796861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
26806861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
26816861f193SBarry Smith   const PetscScalar *x, *v, *alpha;
26826861f193SBarry Smith   PetscScalar       *y;
26836861f193SBarry Smith   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
26846861f193SBarry Smith   PetscInt           n, i, k;
26856861f193SBarry Smith 
26866861f193SBarry Smith   PetscFunctionBegin;
26879566063dSJacob Faibussowitsch   if (yy != zz) PetscCall(VecCopy(yy, zz));
26889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
26899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(zz, &y));
26906861f193SBarry Smith   for (i = 0; i < m; i++) {
26916861f193SBarry Smith     idx   = a->j + a->i[i];
26926861f193SBarry Smith     v     = a->a + a->i[i];
26936861f193SBarry Smith     n     = a->i[i + 1] - a->i[i];
26946861f193SBarry Smith     alpha = x + dof * i;
26956861f193SBarry Smith     while (n-- > 0) {
26969371c9d4SSatish Balay       for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); }
26979371c9d4SSatish Balay       idx++;
26989371c9d4SSatish Balay       v++;
26996861f193SBarry Smith     }
27006861f193SBarry Smith   }
27019566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
27029566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
27039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(zz, &y));
27046861f193SBarry Smith   PetscFunctionReturn(0);
27056861f193SBarry Smith }
27066861f193SBarry Smith 
2707f4a53059SBarry Smith /*===================================================================================*/
27089371c9d4SSatish Balay PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
27094cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2710f4a53059SBarry Smith 
2711b24ad042SBarry Smith   PetscFunctionBegin;
2712f4a53059SBarry Smith   /* start the scatter */
27139566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27149566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
27159566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27169566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2717f4a53059SBarry Smith   PetscFunctionReturn(0);
2718f4a53059SBarry Smith }
2719f4a53059SBarry Smith 
27209371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) {
27214cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2722b24ad042SBarry Smith 
27234cb9d9b8SBarry Smith   PetscFunctionBegin;
27249566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27259566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
27269566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27279566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
27284cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27294cb9d9b8SBarry Smith }
27304cb9d9b8SBarry Smith 
27319371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27324cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
27334cb9d9b8SBarry Smith 
2734b24ad042SBarry Smith   PetscFunctionBegin;
27354cb9d9b8SBarry Smith   /* start the scatter */
27369566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27379566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
27389566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
27399566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
27404cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27414cb9d9b8SBarry Smith }
27424cb9d9b8SBarry Smith 
27439371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) {
27444cb9d9b8SBarry Smith   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2745b24ad042SBarry Smith 
27464cb9d9b8SBarry Smith   PetscFunctionBegin;
27479566063dSJacob Faibussowitsch   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
27489566063dSJacob Faibussowitsch   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
27499566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27509566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
27514cb9d9b8SBarry Smith   PetscFunctionReturn(0);
27524cb9d9b8SBarry Smith }
27534cb9d9b8SBarry Smith 
275495ddefa2SHong Zhang /* ----------------------------------------------------------------*/
27559371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) {
27564222ddf1SHong Zhang   Mat_Product *product = C->product;
27574222ddf1SHong Zhang 
27584222ddf1SHong Zhang   PetscFunctionBegin;
27594222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
27604222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
276198921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
27624222ddf1SHong Zhang   PetscFunctionReturn(0);
27634222ddf1SHong Zhang }
27644222ddf1SHong Zhang 
27659371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) {
27664222ddf1SHong Zhang   Mat_Product *product = C->product;
27674222ddf1SHong Zhang   PetscBool    flg     = PETSC_FALSE;
27684222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
27694222ddf1SHong Zhang   PetscInt     alg = 1; /* set default algorithm */
27704222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
27714222ddf1SHong Zhang   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
27724222ddf1SHong Zhang   PetscInt    nalg        = 4;
27734222ddf1SHong Zhang #else
27744222ddf1SHong Zhang   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
27754222ddf1SHong Zhang   PetscInt    nalg        = 5;
27764222ddf1SHong Zhang #endif
27774222ddf1SHong Zhang 
27784222ddf1SHong Zhang   PetscFunctionBegin;
277908401ef6SPierre Jolivet   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
27804222ddf1SHong Zhang 
27814222ddf1SHong Zhang   /* PtAP */
27824222ddf1SHong Zhang   /* Check matrix local sizes */
27839371c9d4SSatish Balay   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27849371c9d4SSatish Balay              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
27859371c9d4SSatish Balay   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
27869371c9d4SSatish Balay              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
27874222ddf1SHong Zhang 
27884222ddf1SHong Zhang   /* Set the default algorithm */
27899566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2790*48a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
27914222ddf1SHong Zhang 
27924222ddf1SHong Zhang   /* Get runtime option */
2793d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
27949566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
2795*48a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2796d0609cedSBarry Smith   PetscOptionsEnd();
27974222ddf1SHong Zhang 
27989566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
27994222ddf1SHong Zhang   if (flg) {
28004222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28014222ddf1SHong Zhang     PetscFunctionReturn(0);
28024222ddf1SHong Zhang   }
28034222ddf1SHong Zhang 
28049566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
28054222ddf1SHong Zhang   if (flg) {
28064222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
28074222ddf1SHong Zhang     PetscFunctionReturn(0);
28084222ddf1SHong Zhang   }
28094222ddf1SHong Zhang 
28104222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
28119566063dSJacob Faibussowitsch   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
28129566063dSJacob Faibussowitsch   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
28139566063dSJacob Faibussowitsch   PetscCall(MatProductSetFromOptions(C));
28144222ddf1SHong Zhang   PetscFunctionReturn(0);
28154222ddf1SHong Zhang }
28164222ddf1SHong Zhang 
28174222ddf1SHong Zhang /* ----------------------------------------------------------------*/
28189371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) {
28190298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
28207ba1a0bfSKris Buschelman   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
28217ba1a0bfSKris Buschelman   Mat                P  = pp->AIJ;
28227ba1a0bfSKris Buschelman   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2823d2840e09SBarry Smith   PetscInt          *pti, *ptj, *ptJ;
28247ba1a0bfSKris Buschelman   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2825d2840e09SBarry Smith   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2826d2840e09SBarry Smith   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
28277ba1a0bfSKris Buschelman   MatScalar         *ca;
2828d2840e09SBarry Smith   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
28297ba1a0bfSKris Buschelman 
28307ba1a0bfSKris Buschelman   PetscFunctionBegin;
28317ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28329566063dSJacob Faibussowitsch   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
28337ba1a0bfSKris Buschelman 
28347ba1a0bfSKris Buschelman   cn = pn * ppdof;
28357ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28367ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
28379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(cn + 1, &ci));
28387ba1a0bfSKris Buschelman   ci[0] = 0;
28397ba1a0bfSKris Buschelman 
28407ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
28419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
28429566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ptadenserow, an));
28439566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(denserow, cn));
28447ba1a0bfSKris Buschelman 
28457ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28467ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28477ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
28489566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
28497ba1a0bfSKris Buschelman   current_space = free_space;
28507ba1a0bfSKris Buschelman 
28517ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
28527ba1a0bfSKris Buschelman   for (i = 0; i < pn; i++) {
28537ba1a0bfSKris Buschelman     ptnzi = pti[i + 1] - pti[i];
28547ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
28557ba1a0bfSKris Buschelman     for (dof = 0; dof < ppdof; dof++) {
28567ba1a0bfSKris Buschelman       ptanzi = 0;
28577ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
28587ba1a0bfSKris Buschelman       for (j = 0; j < ptnzi; j++) {
28597ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
28607ba1a0bfSKris Buschelman         arow = ptJ[j] * ppdof + dof;
28617ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
28627ba1a0bfSKris Buschelman         anzj = ai[arow + 1] - ai[arow];
28637ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
28647ba1a0bfSKris Buschelman         for (k = 0; k < anzj; k++) {
28657ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
28667ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
28677ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
28687ba1a0bfSKris Buschelman           }
28697ba1a0bfSKris Buschelman         }
28707ba1a0bfSKris Buschelman       }
28717ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
28727ba1a0bfSKris Buschelman       ptaj = ptasparserow;
28737ba1a0bfSKris Buschelman       cnzi = 0;
28747ba1a0bfSKris Buschelman       for (j = 0; j < ptanzi; j++) {
28757ba1a0bfSKris Buschelman         /* Get offset within block of P */
28767ba1a0bfSKris Buschelman         pshift = *ptaj % ppdof;
28777ba1a0bfSKris Buschelman         /* Get block row of P */
28787ba1a0bfSKris Buschelman         prow   = (*ptaj++) / ppdof; /* integer division */
28797ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
28807ba1a0bfSKris Buschelman         pnzj   = pi[prow + 1] - pi[prow];
28817ba1a0bfSKris Buschelman         pjj    = pj + pi[prow];
28827ba1a0bfSKris Buschelman         for (k = 0; k < pnzj; k++) {
28837ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
28847ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
28857ba1a0bfSKris Buschelman           if (!denserow[pjj[k] * ppdof + pshift]) {
28867ba1a0bfSKris Buschelman             denserow[pjj[k] * ppdof + pshift] = -1;
28877ba1a0bfSKris Buschelman             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
28887ba1a0bfSKris Buschelman           }
28897ba1a0bfSKris Buschelman         }
28907ba1a0bfSKris Buschelman       }
28917ba1a0bfSKris Buschelman 
28927ba1a0bfSKris Buschelman       /* sort sparserow */
28939566063dSJacob Faibussowitsch       PetscCall(PetscSortInt(cnzi, sparserow));
28947ba1a0bfSKris Buschelman 
28957ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
28967ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
2897*48a46eb9SPierre Jolivet       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
28987ba1a0bfSKris Buschelman 
28997ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29009566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
290126fbe8dcSKarl Rupp 
29027ba1a0bfSKris Buschelman       current_space->array += cnzi;
29037ba1a0bfSKris Buschelman       current_space->local_used += cnzi;
29047ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29057ba1a0bfSKris Buschelman 
290626fbe8dcSKarl Rupp       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
290726fbe8dcSKarl Rupp       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
290826fbe8dcSKarl Rupp 
29097ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29107ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29117ba1a0bfSKris Buschelman       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
29127ba1a0bfSKris Buschelman     }
29137ba1a0bfSKris Buschelman   }
29147ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29157ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29167ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
29179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
29189566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
29199566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
29207ba1a0bfSKris Buschelman 
29217ba1a0bfSKris Buschelman   /* Allocate space for ca */
29229566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
29237ba1a0bfSKris Buschelman 
29247ba1a0bfSKris Buschelman   /* put together the new matrix */
29259566063dSJacob Faibussowitsch   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
29269566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(C, pp->dof));
29277ba1a0bfSKris Buschelman 
29287ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29297ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29304222ddf1SHong Zhang   c          = (Mat_SeqAIJ *)(C->data);
2931e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2932e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29337ba1a0bfSKris Buschelman   c->nonew   = 0;
293426fbe8dcSKarl Rupp 
29354222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29364222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
29377ba1a0bfSKris Buschelman 
29387ba1a0bfSKris Buschelman   /* Clean up. */
29399566063dSJacob Faibussowitsch   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
29407ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29417ba1a0bfSKris Buschelman }
29427ba1a0bfSKris Buschelman 
29439371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) {
29447ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29457ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
29467ba1a0bfSKris Buschelman   Mat              P  = pp->AIJ;
29477ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
29487ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
29497ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
2950a2ea699eSBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2951d2840e09SBarry Smith   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
2952d2840e09SBarry Smith   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2953d2840e09SBarry Smith   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2954a2ea699eSBarry Smith   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2955d2840e09SBarry Smith   MatScalar       *ca = c->a, *caj, *apa;
29567ba1a0bfSKris Buschelman 
29577ba1a0bfSKris Buschelman   PetscFunctionBegin;
29587ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
29599566063dSJacob Faibussowitsch   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
29607ba1a0bfSKris Buschelman 
29617ba1a0bfSKris Buschelman   /* Clear old values in C */
29629566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(ca, ci[cm]));
29637ba1a0bfSKris Buschelman 
29647ba1a0bfSKris Buschelman   for (i = 0; i < am; i++) {
29657ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
29667ba1a0bfSKris Buschelman     anzi  = ai[i + 1] - ai[i];
29677ba1a0bfSKris Buschelman     apnzj = 0;
29687ba1a0bfSKris Buschelman     for (j = 0; j < anzi; j++) {
29697ba1a0bfSKris Buschelman       /* Get offset within block of P */
29707ba1a0bfSKris Buschelman       pshift = *aj % ppdof;
29717ba1a0bfSKris Buschelman       /* Get block row of P */
29727ba1a0bfSKris Buschelman       prow   = *aj++ / ppdof; /* integer division */
29737ba1a0bfSKris Buschelman       pnzj   = pi[prow + 1] - pi[prow];
29747ba1a0bfSKris Buschelman       pjj    = pj + pi[prow];
29757ba1a0bfSKris Buschelman       paj    = pa + pi[prow];
29767ba1a0bfSKris Buschelman       for (k = 0; k < pnzj; k++) {
29777ba1a0bfSKris Buschelman         poffset = pjj[k] * ppdof + pshift;
29787ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
29797ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
29807ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
29817ba1a0bfSKris Buschelman         }
29827ba1a0bfSKris Buschelman         apa[poffset] += (*aa) * paj[k];
29837ba1a0bfSKris Buschelman       }
29849566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * pnzj));
29857ba1a0bfSKris Buschelman       aa++;
29867ba1a0bfSKris Buschelman     }
29877ba1a0bfSKris Buschelman 
29887ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
29897ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
29909566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(apnzj, apj));
29917ba1a0bfSKris Buschelman 
29927ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
29937ba1a0bfSKris Buschelman     prow    = i / ppdof; /* integer division */
29947ba1a0bfSKris Buschelman     pshift  = i % ppdof;
29957ba1a0bfSKris Buschelman     poffset = pi[prow];
29967ba1a0bfSKris Buschelman     pnzi    = pi[prow + 1] - poffset;
29977ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
29987ba1a0bfSKris Buschelman     pJ      = pj + poffset;
29997ba1a0bfSKris Buschelman     pA      = pa + poffset;
30007ba1a0bfSKris Buschelman     for (j = 0; j < pnzi; j++) {
30017ba1a0bfSKris Buschelman       crow = (*pJ) * ppdof + pshift;
30027ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30037ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30047ba1a0bfSKris Buschelman       pJ++;
30057ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30067ba1a0bfSKris Buschelman       for (k = 0, nextap = 0; nextap < apnzj; k++) {
300726fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
30087ba1a0bfSKris Buschelman       }
30099566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * apnzj));
30107ba1a0bfSKris Buschelman       pA++;
30117ba1a0bfSKris Buschelman     }
30127ba1a0bfSKris Buschelman 
30137ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30147ba1a0bfSKris Buschelman     for (j = 0; j < apnzj; j++) {
30157ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30167ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30177ba1a0bfSKris Buschelman     }
30187ba1a0bfSKris Buschelman   }
30197ba1a0bfSKris Buschelman 
30207ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
30229566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
30239566063dSJacob Faibussowitsch   PetscCall(PetscFree3(apa, apj, apjdense));
302495ddefa2SHong Zhang   PetscFunctionReturn(0);
302595ddefa2SHong Zhang }
30267ba1a0bfSKris Buschelman 
30279371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) {
30284222ddf1SHong Zhang   Mat_Product *product = C->product;
30294222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30302121bac1SHong Zhang 
30312121bac1SHong Zhang   PetscFunctionBegin;
30329566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
30332121bac1SHong Zhang   PetscFunctionReturn(0);
30342121bac1SHong Zhang }
30352121bac1SHong Zhang 
30369371c9d4SSatish Balay PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) {
303795ddefa2SHong Zhang   PetscFunctionBegin;
30383e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
303995ddefa2SHong Zhang }
304095ddefa2SHong Zhang 
30419371c9d4SSatish Balay PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) {
304295ddefa2SHong Zhang   PetscFunctionBegin;
30433e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
30447ba1a0bfSKris Buschelman }
30455c65b9ecSFande Kong 
3046bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3047bc8e477aSFande Kong 
30489371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) {
3049bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3050bc8e477aSFande Kong 
3051bc8e477aSFande Kong   PetscFunctionBegin;
305234bcad68SFande Kong 
30539566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3054bc8e477aSFande Kong   PetscFunctionReturn(0);
3055bc8e477aSFande Kong }
3056bc8e477aSFande Kong 
30574222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3058bc8e477aSFande Kong 
30599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) {
3060bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3061bc8e477aSFande Kong 
3062bc8e477aSFande Kong   PetscFunctionBegin;
30639566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
30644222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3065bc8e477aSFande Kong   PetscFunctionReturn(0);
3066bc8e477aSFande Kong }
3067bc8e477aSFande Kong 
3068bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3069bc8e477aSFande Kong 
30709371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) {
3071bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3072bc8e477aSFande Kong 
3073bc8e477aSFande Kong   PetscFunctionBegin;
307434bcad68SFande Kong 
30759566063dSJacob Faibussowitsch   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3076bc8e477aSFande Kong   PetscFunctionReturn(0);
3077bc8e477aSFande Kong }
3078bc8e477aSFande Kong 
30794222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3080bc8e477aSFande Kong 
30819371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) {
3082bc8e477aSFande Kong   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3083bc8e477aSFande Kong 
3084bc8e477aSFande Kong   PetscFunctionBegin;
308534bcad68SFande Kong 
30869566063dSJacob Faibussowitsch   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
30874222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3088bc8e477aSFande Kong   PetscFunctionReturn(0);
3089bc8e477aSFande Kong }
3090bc8e477aSFande Kong 
30919371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) {
30924222ddf1SHong Zhang   Mat_Product *product = C->product;
30934222ddf1SHong Zhang   Mat          A = product->A, P = product->B;
30944222ddf1SHong Zhang   PetscBool    flg;
3095bc8e477aSFande Kong 
3096bc8e477aSFande Kong   PetscFunctionBegin;
30979566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
30984222ddf1SHong Zhang   if (flg) {
30999566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
31004222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31014222ddf1SHong Zhang     PetscFunctionReturn(0);
3102bc8e477aSFande Kong   }
310334bcad68SFande Kong 
31049566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
31054222ddf1SHong Zhang   if (flg) {
31069566063dSJacob Faibussowitsch     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
31074222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
31084222ddf1SHong Zhang     PetscFunctionReturn(0);
31094222ddf1SHong Zhang   }
311034bcad68SFande Kong 
31114222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3112bc8e477aSFande Kong }
3113bc8e477aSFande Kong 
31149371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
31159c4fc4c7SBarry Smith   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3116ceb03754SKris Buschelman   Mat          a   = b->AIJ, B;
31179c4fc4c7SBarry Smith   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
31180fd73130SBarry Smith   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3119ba8c8a56SBarry Smith   PetscInt    *cols;
3120ba8c8a56SBarry Smith   PetscScalar *vals;
31219c4fc4c7SBarry Smith 
31229c4fc4c7SBarry Smith   PetscFunctionBegin;
31239566063dSJacob Faibussowitsch   PetscCall(MatGetSize(a, &m, &n));
31249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(dof * m, &ilen));
31259c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31269c4fc4c7SBarry Smith     nmax = PetscMax(nmax, aij->ilen[i]);
312726fbe8dcSKarl Rupp     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
31289c4fc4c7SBarry Smith   }
31299566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
31309566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
31319566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31329566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
31339566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilen));
31349566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmax, &icols));
31359c4fc4c7SBarry Smith   ii = 0;
31369c4fc4c7SBarry Smith   for (i = 0; i < m; i++) {
31379566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31380fd73130SBarry Smith     for (j = 0; j < dof; j++) {
313926fbe8dcSKarl Rupp       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
31409566063dSJacob Faibussowitsch       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
31419c4fc4c7SBarry Smith       ii++;
31429c4fc4c7SBarry Smith     }
31439566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
31449c4fc4c7SBarry Smith   }
31459566063dSJacob Faibussowitsch   PetscCall(PetscFree(icols));
31469566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
31479566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3148ceb03754SKris Buschelman 
3149511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
31509566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
3151c3d102feSKris Buschelman   } else {
3152ceb03754SKris Buschelman     *newmat = B;
3153c3d102feSKris Buschelman   }
31549c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31559c4fc4c7SBarry Smith }
31569c4fc4c7SBarry Smith 
3157c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3158be1d678aSKris Buschelman 
31599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
31600fd73130SBarry Smith   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3161ceb03754SKris Buschelman   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
31620fd73130SBarry Smith   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
31630fd73130SBarry Smith   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
31640fd73130SBarry Smith   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
31650fd73130SBarry Smith   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
31660298fd71SBarry Smith   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
31670298fd71SBarry Smith   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
31680fd73130SBarry Smith   PetscInt     rstart, cstart, *garray, ii, k;
31690fd73130SBarry Smith   PetscScalar *vals, *ovals;
31700fd73130SBarry Smith 
31710fd73130SBarry Smith   PetscFunctionBegin;
31729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3173d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31740fd73130SBarry Smith     nmax  = PetscMax(nmax, AIJ->ilen[i]);
31750fd73130SBarry Smith     onmax = PetscMax(onmax, OAIJ->ilen[i]);
31760fd73130SBarry Smith     for (j = 0; j < dof; j++) {
31770fd73130SBarry Smith       dnz[dof * i + j] = AIJ->ilen[i];
31780fd73130SBarry Smith       onz[dof * i + j] = OAIJ->ilen[i];
31790fd73130SBarry Smith     }
31800fd73130SBarry Smith   }
31819566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
31829566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
31839566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, newtype));
31849566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
31859566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, dof));
31869566063dSJacob Faibussowitsch   PetscCall(PetscFree2(dnz, onz));
31870fd73130SBarry Smith 
31889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3189d0f46423SBarry Smith   rstart = dof * maij->A->rmap->rstart;
3190d0f46423SBarry Smith   cstart = dof * maij->A->cmap->rstart;
31910fd73130SBarry Smith   garray = mpiaij->garray;
31920fd73130SBarry Smith 
31930fd73130SBarry Smith   ii = rstart;
3194d0f46423SBarry Smith   for (i = 0; i < A->rmap->n / dof; i++) {
31959566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
31969566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
31970fd73130SBarry Smith     for (j = 0; j < dof; j++) {
31989371c9d4SSatish Balay       for (k = 0; k < ncols; k++) { icols[k] = cstart + dof * cols[k] + j; }
31999371c9d4SSatish Balay       for (k = 0; k < oncols; k++) { oicols[k] = dof * garray[ocols[k]] + j; }
32009566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
32019566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
32020fd73130SBarry Smith       ii++;
32030fd73130SBarry Smith     }
32049566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
32059566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
32060fd73130SBarry Smith   }
32079566063dSJacob Faibussowitsch   PetscCall(PetscFree2(icols, oicols));
32080fd73130SBarry Smith 
32099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
32109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3211ceb03754SKris Buschelman 
3212511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32137adad957SLisandro Dalcin     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32147adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
321526fbe8dcSKarl Rupp 
32169566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
321726fbe8dcSKarl Rupp 
32187adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3219c3d102feSKris Buschelman   } else {
3220ceb03754SKris Buschelman     *newmat = B;
3221c3d102feSKris Buschelman   }
32220fd73130SBarry Smith   PetscFunctionReturn(0);
32230fd73130SBarry Smith }
32240fd73130SBarry Smith 
32259371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) {
32269e516c8fSBarry Smith   Mat A;
32279e516c8fSBarry Smith 
32289e516c8fSBarry Smith   PetscFunctionBegin;
32299566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32309566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
32319566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
32329e516c8fSBarry Smith   PetscFunctionReturn(0);
32339e516c8fSBarry Smith }
32340fd73130SBarry Smith 
32359371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) {
3236ec626240SStefano Zampini   Mat A;
3237ec626240SStefano Zampini 
3238ec626240SStefano Zampini   PetscFunctionBegin;
32399566063dSJacob Faibussowitsch   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
32409566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
32419566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3242ec626240SStefano Zampini   PetscFunctionReturn(0);
3243ec626240SStefano Zampini }
3244ec626240SStefano Zampini 
3245bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3246480dffcfSJed Brown /*@
32470bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32480bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
32490bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
32500bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
32510bad9183SKris Buschelman 
3252ff585edeSJed Brown   Collective
3253ff585edeSJed Brown 
3254ff585edeSJed Brown   Input Parameters:
3255ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3256ff585edeSJed Brown - dof - the block size (number of components per node)
3257ff585edeSJed Brown 
3258ff585edeSJed Brown   Output Parameter:
3259ff585edeSJed Brown . maij - the new MAIJ matrix
3260ff585edeSJed Brown 
32610bad9183SKris Buschelman   Operations provided:
326267b8a455SSatish Balay .vb
326367b8a455SSatish Balay     MatMult
326467b8a455SSatish Balay     MatMultTranspose
326567b8a455SSatish Balay     MatMultAdd
326667b8a455SSatish Balay     MatMultTransposeAdd
326767b8a455SSatish Balay     MatView
326867b8a455SSatish Balay .ve
32690bad9183SKris Buschelman 
32700bad9183SKris Buschelman   Level: advanced
32710bad9183SKris Buschelman 
3272db781477SPatrick Sanan .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3273ff585edeSJed Brown @*/
32749371c9d4SSatish Balay PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) {
3275b24ad042SBarry Smith   PetscInt  n;
327682b1193eSBarry Smith   Mat       B;
327790f67b22SBarry Smith   PetscBool flg;
3278fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3279fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3280fdc842d1SBarry Smith   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3281fdc842d1SBarry Smith #endif
328282b1193eSBarry Smith 
328382b1193eSBarry Smith   PetscFunctionBegin;
3284fdc842d1SBarry Smith   dof = PetscAbs(dof);
32859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)A));
3286d72c5c08SBarry Smith 
328726fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
328826fbe8dcSKarl Rupp   else {
32899566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3290c248e2fdSStefano Zampini     /* propagate vec type */
32919566063dSJacob Faibussowitsch     PetscCall(MatSetVecType(B, A->defaultvectype));
32929566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
32939566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
32949566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
32959566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
32969566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
329726fbe8dcSKarl Rupp 
3298cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3299d72c5c08SBarry Smith 
330090f67b22SBarry Smith     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
330190f67b22SBarry Smith     if (flg) {
3302feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3303feb9fe23SJed Brown 
33049566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATSEQMAIJ));
330526fbe8dcSKarl Rupp 
33060298fd71SBarry Smith       B->ops->setup   = NULL;
33074cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33080fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
33094222ddf1SHong Zhang 
3310feb9fe23SJed Brown       b      = (Mat_SeqMAIJ *)B->data;
3311b9b97703SBarry Smith       b->dof = dof;
33124cb9d9b8SBarry Smith       b->AIJ = A;
331326fbe8dcSKarl Rupp 
3314d72c5c08SBarry Smith       if (dof == 2) {
3315cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3316cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3317cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3318cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319bcc973b7SBarry Smith       } else if (dof == 3) {
3320bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3321bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3322bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3323bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324bcc973b7SBarry Smith       } else if (dof == 4) {
3325bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3326bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3327bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3328bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329f9fae5adSBarry Smith       } else if (dof == 5) {
3330f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3331f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3332f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3333f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33346cd98798SBarry Smith       } else if (dof == 6) {
33356cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33366cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33376cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33386cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339ed8eea03SBarry Smith       } else if (dof == 7) {
3340ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3341ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3342ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3343ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
334466ed3db0SBarry Smith       } else if (dof == 8) {
334566ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
334666ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
334766ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
334866ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33492b692628SMatthew Knepley       } else if (dof == 9) {
33502b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33512b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33522b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33532b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354b9cda22cSBarry Smith       } else if (dof == 10) {
3355b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3356b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3357b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3358b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359dbdd7285SBarry Smith       } else if (dof == 11) {
3360dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3361dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3362dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3363dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33642f7816d4SBarry Smith       } else if (dof == 16) {
33652f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33662f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33672f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33682f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369ed1c418cSBarry Smith       } else if (dof == 18) {
3370ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3371ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3372ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3373ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
337482b1193eSBarry Smith       } else {
33756861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33766861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33776861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33786861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
337982b1193eSBarry Smith       }
3380fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
33819566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3382fdc842d1SBarry Smith #endif
33839566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
33849566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3385f4a53059SBarry Smith     } else {
3386f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3387feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3388f4a53059SBarry Smith       IS           from, to;
3389f4a53059SBarry Smith       Vec          gvec;
3390f4a53059SBarry Smith 
33919566063dSJacob Faibussowitsch       PetscCall(MatSetType(B, MATMPIMAIJ));
339226fbe8dcSKarl Rupp 
33930298fd71SBarry Smith       B->ops->setup   = NULL;
3394d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
33950fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
339626fbe8dcSKarl Rupp 
3397b9b97703SBarry Smith       b      = (Mat_MPIMAIJ *)B->data;
3398b9b97703SBarry Smith       b->dof = dof;
3399b9b97703SBarry Smith       b->A   = A;
340026fbe8dcSKarl Rupp 
34019566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
34029566063dSJacob Faibussowitsch       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
34034cb9d9b8SBarry Smith 
34049566063dSJacob Faibussowitsch       PetscCall(VecGetSize(mpiaij->lvec, &n));
34059566063dSJacob Faibussowitsch       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
34069566063dSJacob Faibussowitsch       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
34079566063dSJacob Faibussowitsch       PetscCall(VecSetBlockSize(b->w, dof));
34089566063dSJacob Faibussowitsch       PetscCall(VecSetType(b->w, VECSEQ));
3409f4a53059SBarry Smith 
3410f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
34119566063dSJacob Faibussowitsch       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
34129566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3413f4a53059SBarry Smith 
3414f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
34159566063dSJacob Faibussowitsch       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3416f4a53059SBarry Smith 
3417f4a53059SBarry Smith       /* generate the scatter context */
34189566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3419f4a53059SBarry Smith 
34209566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&from));
34219566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&to));
34229566063dSJacob Faibussowitsch       PetscCall(VecDestroy(&gvec));
3423f4a53059SBarry Smith 
3424f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34254cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34264cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34274cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
342826fbe8dcSKarl Rupp 
3429fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
34309566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3431fdc842d1SBarry Smith #endif
34329566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
34339566063dSJacob Faibussowitsch       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3434f4a53059SBarry Smith     }
34357dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3436ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
34379566063dSJacob Faibussowitsch     PetscCall(MatSetUp(B));
3438fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3439fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3440fdc842d1SBarry Smith     {
3441fdc842d1SBarry Smith       PetscBool flg;
3442fdc842d1SBarry Smith       if (convert) {
34439566063dSJacob Faibussowitsch         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
3444*48a46eb9SPierre Jolivet         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3445fdc842d1SBarry Smith       }
3446fdc842d1SBarry Smith     }
3447fdc842d1SBarry Smith #endif
3448cd3bbe55SBarry Smith     *maij = B;
3449d72c5c08SBarry Smith   }
345082b1193eSBarry Smith   PetscFunctionReturn(0);
345182b1193eSBarry Smith }
3452