xref: /petsc/src/mat/impls/maij/maij.c (revision 37d05b0256c1e9ba4bc423c4eccb3df226931ef0)
1 
2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/utils/freespace.h>
4 
5 /*@
6    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7 
8    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9 
10    Input Parameter:
11 .  A - the `MATMAIJ` matrix
12 
13    Output Parameter:
14 .  B - the `MATAIJ` matrix
15 
16    Level: advanced
17 
18    Note:
19     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20 
21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22 @*/
23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24 {
25   PetscBool ismpimaij, isseqmaij;
26 
27   PetscFunctionBegin;
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
29   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30   if (ismpimaij) {
31     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32 
33     *B = b->A;
34   } else if (isseqmaij) {
35     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36 
37     *B = b->AIJ;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 /*@
45    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46 
47    Logically Collective
48 
49    Input Parameters:
50 +  A - the `MATMAIJ` matrix
51 -  dof - the block size for the new matrix
52 
53    Output Parameter:
54 .  B - the new `MATMAIJ` matrix
55 
56    Level: advanced
57 
58 .seealso: `MATMAIJ`, `MatCreateMAIJ()`
59 @*/
60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61 {
62   Mat Aij = NULL;
63 
64   PetscFunctionBegin;
65   PetscValidLogicalCollectiveInt(A, dof, 2);
66   PetscCall(MatMAIJGetAIJ(A, &Aij));
67   PetscCall(MatCreateMAIJ(Aij, dof, B));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72 {
73   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74 
75   PetscFunctionBegin;
76   PetscCall(MatDestroy(&b->AIJ));
77   PetscCall(PetscFree(A->data));
78   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatSetUp_MAIJ(Mat A)
85 {
86   PetscFunctionBegin;
87   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88 }
89 
90 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91 {
92   Mat B;
93 
94   PetscFunctionBegin;
95   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
96   PetscCall(MatView(B, viewer));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107   PetscCall(MatView(B, viewer));
108   PetscCall(MatDestroy(&B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113 {
114   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatDestroy(&b->AIJ));
118   PetscCall(MatDestroy(&b->OAIJ));
119   PetscCall(MatDestroy(&b->A));
120   PetscCall(VecScatterDestroy(&b->ctx));
121   PetscCall(VecDestroy(&b->w));
122   PetscCall(PetscFree(A->data));
123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 /*MC
131   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132   multicomponent problems, interpolating or restricting each component the same way independently.
133   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134 
135   Operations provided:
136 .vb
137     MatMult()
138     MatMultTranspose()
139     MatMultAdd()
140     MatMultTransposeAdd()
141 .ve
142 
143   Level: advanced
144 
145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146 M*/
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149 {
150   Mat_MPIMAIJ *b;
151   PetscMPIInt  size;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscNew(&b));
155   A->data = (void *)b;
156 
157   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158 
159   A->ops->setup = MatSetUp_MAIJ;
160 
161   b->AIJ  = NULL;
162   b->dof  = 0;
163   b->OAIJ = NULL;
164   b->ctx  = NULL;
165   b->w    = NULL;
166   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167   if (size == 1) {
168     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169   } else {
170     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171   }
172   A->preallocated = PETSC_TRUE;
173   A->assembled    = PETSC_TRUE;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /* --------------------------------------------------------------------------------------*/
178 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
179 {
180   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
181   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
182   const PetscScalar *x, *v;
183   PetscScalar       *y, sum1, sum2;
184   PetscInt           nonzerorow = 0, n, i, jrow, j;
185   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
186 
187   PetscFunctionBegin;
188   PetscCall(VecGetArrayRead(xx, &x));
189   PetscCall(VecGetArray(yy, &y));
190   idx = a->j;
191   v   = a->a;
192   ii  = a->i;
193 
194   for (i = 0; i < m; i++) {
195     jrow = ii[i];
196     n    = ii[i + 1] - jrow;
197     sum1 = 0.0;
198     sum2 = 0.0;
199 
200     nonzerorow += (n > 0);
201     for (j = 0; j < n; j++) {
202       sum1 += v[jrow] * x[2 * idx[jrow]];
203       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
204       jrow++;
205     }
206     y[2 * i]     = sum1;
207     y[2 * i + 1] = sum2;
208   }
209 
210   PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow));
211   PetscCall(VecRestoreArrayRead(xx, &x));
212   PetscCall(VecRestoreArray(yy, &y));
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
217 {
218   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
219   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
220   const PetscScalar *x, *v;
221   PetscScalar       *y, alpha1, alpha2;
222   PetscInt           n, i;
223   const PetscInt     m = b->AIJ->rmap->n, *idx;
224 
225   PetscFunctionBegin;
226   PetscCall(VecSet(yy, 0.0));
227   PetscCall(VecGetArrayRead(xx, &x));
228   PetscCall(VecGetArray(yy, &y));
229 
230   for (i = 0; i < m; i++) {
231     idx    = a->j + a->i[i];
232     v      = a->a + a->i[i];
233     n      = a->i[i + 1] - a->i[i];
234     alpha1 = x[2 * i];
235     alpha2 = x[2 * i + 1];
236     while (n-- > 0) {
237       y[2 * (*idx)] += alpha1 * (*v);
238       y[2 * (*idx) + 1] += alpha2 * (*v);
239       idx++;
240       v++;
241     }
242   }
243   PetscCall(PetscLogFlops(4.0 * a->nz));
244   PetscCall(VecRestoreArrayRead(xx, &x));
245   PetscCall(VecRestoreArray(yy, &y));
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
250 {
251   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
252   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
253   const PetscScalar *x, *v;
254   PetscScalar       *y, sum1, sum2;
255   PetscInt           n, i, jrow, j;
256   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
257 
258   PetscFunctionBegin;
259   if (yy != zz) PetscCall(VecCopy(yy, zz));
260   PetscCall(VecGetArrayRead(xx, &x));
261   PetscCall(VecGetArray(zz, &y));
262   idx = a->j;
263   v   = a->a;
264   ii  = a->i;
265 
266   for (i = 0; i < m; i++) {
267     jrow = ii[i];
268     n    = ii[i + 1] - jrow;
269     sum1 = 0.0;
270     sum2 = 0.0;
271     for (j = 0; j < n; j++) {
272       sum1 += v[jrow] * x[2 * idx[jrow]];
273       sum2 += v[jrow] * x[2 * idx[jrow] + 1];
274       jrow++;
275     }
276     y[2 * i] += sum1;
277     y[2 * i + 1] += sum2;
278   }
279 
280   PetscCall(PetscLogFlops(4.0 * a->nz));
281   PetscCall(VecRestoreArrayRead(xx, &x));
282   PetscCall(VecRestoreArray(zz, &y));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
286 {
287   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
288   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
289   const PetscScalar *x, *v;
290   PetscScalar       *y, alpha1, alpha2;
291   PetscInt           n, i;
292   const PetscInt     m = b->AIJ->rmap->n, *idx;
293 
294   PetscFunctionBegin;
295   if (yy != zz) PetscCall(VecCopy(yy, zz));
296   PetscCall(VecGetArrayRead(xx, &x));
297   PetscCall(VecGetArray(zz, &y));
298 
299   for (i = 0; i < m; i++) {
300     idx    = a->j + a->i[i];
301     v      = a->a + a->i[i];
302     n      = a->i[i + 1] - a->i[i];
303     alpha1 = x[2 * i];
304     alpha2 = x[2 * i + 1];
305     while (n-- > 0) {
306       y[2 * (*idx)] += alpha1 * (*v);
307       y[2 * (*idx) + 1] += alpha2 * (*v);
308       idx++;
309       v++;
310     }
311   }
312   PetscCall(PetscLogFlops(4.0 * a->nz));
313   PetscCall(VecRestoreArrayRead(xx, &x));
314   PetscCall(VecRestoreArray(zz, &y));
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 /* --------------------------------------------------------------------------------------*/
318 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
319 {
320   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
321   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
322   const PetscScalar *x, *v;
323   PetscScalar       *y, sum1, sum2, sum3;
324   PetscInt           nonzerorow = 0, n, i, jrow, j;
325   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
326 
327   PetscFunctionBegin;
328   PetscCall(VecGetArrayRead(xx, &x));
329   PetscCall(VecGetArray(yy, &y));
330   idx = a->j;
331   v   = a->a;
332   ii  = a->i;
333 
334   for (i = 0; i < m; i++) {
335     jrow = ii[i];
336     n    = ii[i + 1] - jrow;
337     sum1 = 0.0;
338     sum2 = 0.0;
339     sum3 = 0.0;
340 
341     nonzerorow += (n > 0);
342     for (j = 0; j < n; j++) {
343       sum1 += v[jrow] * x[3 * idx[jrow]];
344       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
345       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
346       jrow++;
347     }
348     y[3 * i]     = sum1;
349     y[3 * i + 1] = sum2;
350     y[3 * i + 2] = sum3;
351   }
352 
353   PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow));
354   PetscCall(VecRestoreArrayRead(xx, &x));
355   PetscCall(VecRestoreArray(yy, &y));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
360 {
361   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
362   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
363   const PetscScalar *x, *v;
364   PetscScalar       *y, alpha1, alpha2, alpha3;
365   PetscInt           n, i;
366   const PetscInt     m = b->AIJ->rmap->n, *idx;
367 
368   PetscFunctionBegin;
369   PetscCall(VecSet(yy, 0.0));
370   PetscCall(VecGetArrayRead(xx, &x));
371   PetscCall(VecGetArray(yy, &y));
372 
373   for (i = 0; i < m; i++) {
374     idx    = a->j + a->i[i];
375     v      = a->a + a->i[i];
376     n      = a->i[i + 1] - a->i[i];
377     alpha1 = x[3 * i];
378     alpha2 = x[3 * i + 1];
379     alpha3 = x[3 * i + 2];
380     while (n-- > 0) {
381       y[3 * (*idx)] += alpha1 * (*v);
382       y[3 * (*idx) + 1] += alpha2 * (*v);
383       y[3 * (*idx) + 2] += alpha3 * (*v);
384       idx++;
385       v++;
386     }
387   }
388   PetscCall(PetscLogFlops(6.0 * a->nz));
389   PetscCall(VecRestoreArrayRead(xx, &x));
390   PetscCall(VecRestoreArray(yy, &y));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
395 {
396   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
397   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
398   const PetscScalar *x, *v;
399   PetscScalar       *y, sum1, sum2, sum3;
400   PetscInt           n, i, jrow, j;
401   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
402 
403   PetscFunctionBegin;
404   if (yy != zz) PetscCall(VecCopy(yy, zz));
405   PetscCall(VecGetArrayRead(xx, &x));
406   PetscCall(VecGetArray(zz, &y));
407   idx = a->j;
408   v   = a->a;
409   ii  = a->i;
410 
411   for (i = 0; i < m; i++) {
412     jrow = ii[i];
413     n    = ii[i + 1] - jrow;
414     sum1 = 0.0;
415     sum2 = 0.0;
416     sum3 = 0.0;
417     for (j = 0; j < n; j++) {
418       sum1 += v[jrow] * x[3 * idx[jrow]];
419       sum2 += v[jrow] * x[3 * idx[jrow] + 1];
420       sum3 += v[jrow] * x[3 * idx[jrow] + 2];
421       jrow++;
422     }
423     y[3 * i] += sum1;
424     y[3 * i + 1] += sum2;
425     y[3 * i + 2] += sum3;
426   }
427 
428   PetscCall(PetscLogFlops(6.0 * a->nz));
429   PetscCall(VecRestoreArrayRead(xx, &x));
430   PetscCall(VecRestoreArray(zz, &y));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
434 {
435   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
436   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
437   const PetscScalar *x, *v;
438   PetscScalar       *y, alpha1, alpha2, alpha3;
439   PetscInt           n, i;
440   const PetscInt     m = b->AIJ->rmap->n, *idx;
441 
442   PetscFunctionBegin;
443   if (yy != zz) PetscCall(VecCopy(yy, zz));
444   PetscCall(VecGetArrayRead(xx, &x));
445   PetscCall(VecGetArray(zz, &y));
446   for (i = 0; i < m; i++) {
447     idx    = a->j + a->i[i];
448     v      = a->a + a->i[i];
449     n      = a->i[i + 1] - a->i[i];
450     alpha1 = x[3 * i];
451     alpha2 = x[3 * i + 1];
452     alpha3 = x[3 * i + 2];
453     while (n-- > 0) {
454       y[3 * (*idx)] += alpha1 * (*v);
455       y[3 * (*idx) + 1] += alpha2 * (*v);
456       y[3 * (*idx) + 2] += alpha3 * (*v);
457       idx++;
458       v++;
459     }
460   }
461   PetscCall(PetscLogFlops(6.0 * a->nz));
462   PetscCall(VecRestoreArrayRead(xx, &x));
463   PetscCall(VecRestoreArray(zz, &y));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 /* ------------------------------------------------------------------------------*/
468 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
469 {
470   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
471   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
472   const PetscScalar *x, *v;
473   PetscScalar       *y, sum1, sum2, sum3, sum4;
474   PetscInt           nonzerorow = 0, n, i, jrow, j;
475   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
476 
477   PetscFunctionBegin;
478   PetscCall(VecGetArrayRead(xx, &x));
479   PetscCall(VecGetArray(yy, &y));
480   idx = a->j;
481   v   = a->a;
482   ii  = a->i;
483 
484   for (i = 0; i < m; i++) {
485     jrow = ii[i];
486     n    = ii[i + 1] - jrow;
487     sum1 = 0.0;
488     sum2 = 0.0;
489     sum3 = 0.0;
490     sum4 = 0.0;
491     nonzerorow += (n > 0);
492     for (j = 0; j < n; j++) {
493       sum1 += v[jrow] * x[4 * idx[jrow]];
494       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
495       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
496       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
497       jrow++;
498     }
499     y[4 * i]     = sum1;
500     y[4 * i + 1] = sum2;
501     y[4 * i + 2] = sum3;
502     y[4 * i + 3] = sum4;
503   }
504 
505   PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow));
506   PetscCall(VecRestoreArrayRead(xx, &x));
507   PetscCall(VecRestoreArray(yy, &y));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
512 {
513   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
514   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
515   const PetscScalar *x, *v;
516   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
517   PetscInt           n, i;
518   const PetscInt     m = b->AIJ->rmap->n, *idx;
519 
520   PetscFunctionBegin;
521   PetscCall(VecSet(yy, 0.0));
522   PetscCall(VecGetArrayRead(xx, &x));
523   PetscCall(VecGetArray(yy, &y));
524   for (i = 0; i < m; i++) {
525     idx    = a->j + a->i[i];
526     v      = a->a + a->i[i];
527     n      = a->i[i + 1] - a->i[i];
528     alpha1 = x[4 * i];
529     alpha2 = x[4 * i + 1];
530     alpha3 = x[4 * i + 2];
531     alpha4 = x[4 * i + 3];
532     while (n-- > 0) {
533       y[4 * (*idx)] += alpha1 * (*v);
534       y[4 * (*idx) + 1] += alpha2 * (*v);
535       y[4 * (*idx) + 2] += alpha3 * (*v);
536       y[4 * (*idx) + 3] += alpha4 * (*v);
537       idx++;
538       v++;
539     }
540   }
541   PetscCall(PetscLogFlops(8.0 * a->nz));
542   PetscCall(VecRestoreArrayRead(xx, &x));
543   PetscCall(VecRestoreArray(yy, &y));
544   PetscFunctionReturn(PETSC_SUCCESS);
545 }
546 
547 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
548 {
549   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
550   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
551   const PetscScalar *x, *v;
552   PetscScalar       *y, sum1, sum2, sum3, sum4;
553   PetscInt           n, i, jrow, j;
554   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
555 
556   PetscFunctionBegin;
557   if (yy != zz) PetscCall(VecCopy(yy, zz));
558   PetscCall(VecGetArrayRead(xx, &x));
559   PetscCall(VecGetArray(zz, &y));
560   idx = a->j;
561   v   = a->a;
562   ii  = a->i;
563 
564   for (i = 0; i < m; i++) {
565     jrow = ii[i];
566     n    = ii[i + 1] - jrow;
567     sum1 = 0.0;
568     sum2 = 0.0;
569     sum3 = 0.0;
570     sum4 = 0.0;
571     for (j = 0; j < n; j++) {
572       sum1 += v[jrow] * x[4 * idx[jrow]];
573       sum2 += v[jrow] * x[4 * idx[jrow] + 1];
574       sum3 += v[jrow] * x[4 * idx[jrow] + 2];
575       sum4 += v[jrow] * x[4 * idx[jrow] + 3];
576       jrow++;
577     }
578     y[4 * i] += sum1;
579     y[4 * i + 1] += sum2;
580     y[4 * i + 2] += sum3;
581     y[4 * i + 3] += sum4;
582   }
583 
584   PetscCall(PetscLogFlops(8.0 * a->nz));
585   PetscCall(VecRestoreArrayRead(xx, &x));
586   PetscCall(VecRestoreArray(zz, &y));
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
590 {
591   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
592   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
593   const PetscScalar *x, *v;
594   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4;
595   PetscInt           n, i;
596   const PetscInt     m = b->AIJ->rmap->n, *idx;
597 
598   PetscFunctionBegin;
599   if (yy != zz) PetscCall(VecCopy(yy, zz));
600   PetscCall(VecGetArrayRead(xx, &x));
601   PetscCall(VecGetArray(zz, &y));
602 
603   for (i = 0; i < m; i++) {
604     idx    = a->j + a->i[i];
605     v      = a->a + a->i[i];
606     n      = a->i[i + 1] - a->i[i];
607     alpha1 = x[4 * i];
608     alpha2 = x[4 * i + 1];
609     alpha3 = x[4 * i + 2];
610     alpha4 = x[4 * i + 3];
611     while (n-- > 0) {
612       y[4 * (*idx)] += alpha1 * (*v);
613       y[4 * (*idx) + 1] += alpha2 * (*v);
614       y[4 * (*idx) + 2] += alpha3 * (*v);
615       y[4 * (*idx) + 3] += alpha4 * (*v);
616       idx++;
617       v++;
618     }
619   }
620   PetscCall(PetscLogFlops(8.0 * a->nz));
621   PetscCall(VecRestoreArrayRead(xx, &x));
622   PetscCall(VecRestoreArray(zz, &y));
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 /* ------------------------------------------------------------------------------*/
626 
627 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
628 {
629   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
630   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
631   const PetscScalar *x, *v;
632   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
633   PetscInt           nonzerorow = 0, n, i, jrow, j;
634   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
635 
636   PetscFunctionBegin;
637   PetscCall(VecGetArrayRead(xx, &x));
638   PetscCall(VecGetArray(yy, &y));
639   idx = a->j;
640   v   = a->a;
641   ii  = a->i;
642 
643   for (i = 0; i < m; i++) {
644     jrow = ii[i];
645     n    = ii[i + 1] - jrow;
646     sum1 = 0.0;
647     sum2 = 0.0;
648     sum3 = 0.0;
649     sum4 = 0.0;
650     sum5 = 0.0;
651 
652     nonzerorow += (n > 0);
653     for (j = 0; j < n; j++) {
654       sum1 += v[jrow] * x[5 * idx[jrow]];
655       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
656       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
657       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
658       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
659       jrow++;
660     }
661     y[5 * i]     = sum1;
662     y[5 * i + 1] = sum2;
663     y[5 * i + 2] = sum3;
664     y[5 * i + 3] = sum4;
665     y[5 * i + 4] = sum5;
666   }
667 
668   PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow));
669   PetscCall(VecRestoreArrayRead(xx, &x));
670   PetscCall(VecRestoreArray(yy, &y));
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
675 {
676   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
677   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
678   const PetscScalar *x, *v;
679   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
680   PetscInt           n, i;
681   const PetscInt     m = b->AIJ->rmap->n, *idx;
682 
683   PetscFunctionBegin;
684   PetscCall(VecSet(yy, 0.0));
685   PetscCall(VecGetArrayRead(xx, &x));
686   PetscCall(VecGetArray(yy, &y));
687 
688   for (i = 0; i < m; i++) {
689     idx    = a->j + a->i[i];
690     v      = a->a + a->i[i];
691     n      = a->i[i + 1] - a->i[i];
692     alpha1 = x[5 * i];
693     alpha2 = x[5 * i + 1];
694     alpha3 = x[5 * i + 2];
695     alpha4 = x[5 * i + 3];
696     alpha5 = x[5 * i + 4];
697     while (n-- > 0) {
698       y[5 * (*idx)] += alpha1 * (*v);
699       y[5 * (*idx) + 1] += alpha2 * (*v);
700       y[5 * (*idx) + 2] += alpha3 * (*v);
701       y[5 * (*idx) + 3] += alpha4 * (*v);
702       y[5 * (*idx) + 4] += alpha5 * (*v);
703       idx++;
704       v++;
705     }
706   }
707   PetscCall(PetscLogFlops(10.0 * a->nz));
708   PetscCall(VecRestoreArrayRead(xx, &x));
709   PetscCall(VecRestoreArray(yy, &y));
710   PetscFunctionReturn(PETSC_SUCCESS);
711 }
712 
713 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
714 {
715   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
716   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
717   const PetscScalar *x, *v;
718   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5;
719   PetscInt           n, i, jrow, j;
720   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
721 
722   PetscFunctionBegin;
723   if (yy != zz) PetscCall(VecCopy(yy, zz));
724   PetscCall(VecGetArrayRead(xx, &x));
725   PetscCall(VecGetArray(zz, &y));
726   idx = a->j;
727   v   = a->a;
728   ii  = a->i;
729 
730   for (i = 0; i < m; i++) {
731     jrow = ii[i];
732     n    = ii[i + 1] - jrow;
733     sum1 = 0.0;
734     sum2 = 0.0;
735     sum3 = 0.0;
736     sum4 = 0.0;
737     sum5 = 0.0;
738     for (j = 0; j < n; j++) {
739       sum1 += v[jrow] * x[5 * idx[jrow]];
740       sum2 += v[jrow] * x[5 * idx[jrow] + 1];
741       sum3 += v[jrow] * x[5 * idx[jrow] + 2];
742       sum4 += v[jrow] * x[5 * idx[jrow] + 3];
743       sum5 += v[jrow] * x[5 * idx[jrow] + 4];
744       jrow++;
745     }
746     y[5 * i] += sum1;
747     y[5 * i + 1] += sum2;
748     y[5 * i + 2] += sum3;
749     y[5 * i + 3] += sum4;
750     y[5 * i + 4] += sum5;
751   }
752 
753   PetscCall(PetscLogFlops(10.0 * a->nz));
754   PetscCall(VecRestoreArrayRead(xx, &x));
755   PetscCall(VecRestoreArray(zz, &y));
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
759 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
760 {
761   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
762   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
763   const PetscScalar *x, *v;
764   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5;
765   PetscInt           n, i;
766   const PetscInt     m = b->AIJ->rmap->n, *idx;
767 
768   PetscFunctionBegin;
769   if (yy != zz) PetscCall(VecCopy(yy, zz));
770   PetscCall(VecGetArrayRead(xx, &x));
771   PetscCall(VecGetArray(zz, &y));
772 
773   for (i = 0; i < m; i++) {
774     idx    = a->j + a->i[i];
775     v      = a->a + a->i[i];
776     n      = a->i[i + 1] - a->i[i];
777     alpha1 = x[5 * i];
778     alpha2 = x[5 * i + 1];
779     alpha3 = x[5 * i + 2];
780     alpha4 = x[5 * i + 3];
781     alpha5 = x[5 * i + 4];
782     while (n-- > 0) {
783       y[5 * (*idx)] += alpha1 * (*v);
784       y[5 * (*idx) + 1] += alpha2 * (*v);
785       y[5 * (*idx) + 2] += alpha3 * (*v);
786       y[5 * (*idx) + 3] += alpha4 * (*v);
787       y[5 * (*idx) + 4] += alpha5 * (*v);
788       idx++;
789       v++;
790     }
791   }
792   PetscCall(PetscLogFlops(10.0 * a->nz));
793   PetscCall(VecRestoreArrayRead(xx, &x));
794   PetscCall(VecRestoreArray(zz, &y));
795   PetscFunctionReturn(PETSC_SUCCESS);
796 }
797 
798 /* ------------------------------------------------------------------------------*/
799 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
800 {
801   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
802   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
803   const PetscScalar *x, *v;
804   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
805   PetscInt           nonzerorow = 0, n, i, jrow, j;
806   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
807 
808   PetscFunctionBegin;
809   PetscCall(VecGetArrayRead(xx, &x));
810   PetscCall(VecGetArray(yy, &y));
811   idx = a->j;
812   v   = a->a;
813   ii  = a->i;
814 
815   for (i = 0; i < m; i++) {
816     jrow = ii[i];
817     n    = ii[i + 1] - jrow;
818     sum1 = 0.0;
819     sum2 = 0.0;
820     sum3 = 0.0;
821     sum4 = 0.0;
822     sum5 = 0.0;
823     sum6 = 0.0;
824 
825     nonzerorow += (n > 0);
826     for (j = 0; j < n; j++) {
827       sum1 += v[jrow] * x[6 * idx[jrow]];
828       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
829       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
830       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
831       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
832       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
833       jrow++;
834     }
835     y[6 * i]     = sum1;
836     y[6 * i + 1] = sum2;
837     y[6 * i + 2] = sum3;
838     y[6 * i + 3] = sum4;
839     y[6 * i + 4] = sum5;
840     y[6 * i + 5] = sum6;
841   }
842 
843   PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow));
844   PetscCall(VecRestoreArrayRead(xx, &x));
845   PetscCall(VecRestoreArray(yy, &y));
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
850 {
851   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
852   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
853   const PetscScalar *x, *v;
854   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
855   PetscInt           n, i;
856   const PetscInt     m = b->AIJ->rmap->n, *idx;
857 
858   PetscFunctionBegin;
859   PetscCall(VecSet(yy, 0.0));
860   PetscCall(VecGetArrayRead(xx, &x));
861   PetscCall(VecGetArray(yy, &y));
862 
863   for (i = 0; i < m; i++) {
864     idx    = a->j + a->i[i];
865     v      = a->a + a->i[i];
866     n      = a->i[i + 1] - a->i[i];
867     alpha1 = x[6 * i];
868     alpha2 = x[6 * i + 1];
869     alpha3 = x[6 * i + 2];
870     alpha4 = x[6 * i + 3];
871     alpha5 = x[6 * i + 4];
872     alpha6 = x[6 * i + 5];
873     while (n-- > 0) {
874       y[6 * (*idx)] += alpha1 * (*v);
875       y[6 * (*idx) + 1] += alpha2 * (*v);
876       y[6 * (*idx) + 2] += alpha3 * (*v);
877       y[6 * (*idx) + 3] += alpha4 * (*v);
878       y[6 * (*idx) + 4] += alpha5 * (*v);
879       y[6 * (*idx) + 5] += alpha6 * (*v);
880       idx++;
881       v++;
882     }
883   }
884   PetscCall(PetscLogFlops(12.0 * a->nz));
885   PetscCall(VecRestoreArrayRead(xx, &x));
886   PetscCall(VecRestoreArray(yy, &y));
887   PetscFunctionReturn(PETSC_SUCCESS);
888 }
889 
890 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
891 {
892   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
893   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
894   const PetscScalar *x, *v;
895   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6;
896   PetscInt           n, i, jrow, j;
897   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
898 
899   PetscFunctionBegin;
900   if (yy != zz) PetscCall(VecCopy(yy, zz));
901   PetscCall(VecGetArrayRead(xx, &x));
902   PetscCall(VecGetArray(zz, &y));
903   idx = a->j;
904   v   = a->a;
905   ii  = a->i;
906 
907   for (i = 0; i < m; i++) {
908     jrow = ii[i];
909     n    = ii[i + 1] - jrow;
910     sum1 = 0.0;
911     sum2 = 0.0;
912     sum3 = 0.0;
913     sum4 = 0.0;
914     sum5 = 0.0;
915     sum6 = 0.0;
916     for (j = 0; j < n; j++) {
917       sum1 += v[jrow] * x[6 * idx[jrow]];
918       sum2 += v[jrow] * x[6 * idx[jrow] + 1];
919       sum3 += v[jrow] * x[6 * idx[jrow] + 2];
920       sum4 += v[jrow] * x[6 * idx[jrow] + 3];
921       sum5 += v[jrow] * x[6 * idx[jrow] + 4];
922       sum6 += v[jrow] * x[6 * idx[jrow] + 5];
923       jrow++;
924     }
925     y[6 * i] += sum1;
926     y[6 * i + 1] += sum2;
927     y[6 * i + 2] += sum3;
928     y[6 * i + 3] += sum4;
929     y[6 * i + 4] += sum5;
930     y[6 * i + 5] += sum6;
931   }
932 
933   PetscCall(PetscLogFlops(12.0 * a->nz));
934   PetscCall(VecRestoreArrayRead(xx, &x));
935   PetscCall(VecRestoreArray(zz, &y));
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
940 {
941   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
942   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
943   const PetscScalar *x, *v;
944   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
945   PetscInt           n, i;
946   const PetscInt     m = b->AIJ->rmap->n, *idx;
947 
948   PetscFunctionBegin;
949   if (yy != zz) PetscCall(VecCopy(yy, zz));
950   PetscCall(VecGetArrayRead(xx, &x));
951   PetscCall(VecGetArray(zz, &y));
952 
953   for (i = 0; i < m; i++) {
954     idx    = a->j + a->i[i];
955     v      = a->a + a->i[i];
956     n      = a->i[i + 1] - a->i[i];
957     alpha1 = x[6 * i];
958     alpha2 = x[6 * i + 1];
959     alpha3 = x[6 * i + 2];
960     alpha4 = x[6 * i + 3];
961     alpha5 = x[6 * i + 4];
962     alpha6 = x[6 * i + 5];
963     while (n-- > 0) {
964       y[6 * (*idx)] += alpha1 * (*v);
965       y[6 * (*idx) + 1] += alpha2 * (*v);
966       y[6 * (*idx) + 2] += alpha3 * (*v);
967       y[6 * (*idx) + 3] += alpha4 * (*v);
968       y[6 * (*idx) + 4] += alpha5 * (*v);
969       y[6 * (*idx) + 5] += alpha6 * (*v);
970       idx++;
971       v++;
972     }
973   }
974   PetscCall(PetscLogFlops(12.0 * a->nz));
975   PetscCall(VecRestoreArrayRead(xx, &x));
976   PetscCall(VecRestoreArray(zz, &y));
977   PetscFunctionReturn(PETSC_SUCCESS);
978 }
979 
980 /* ------------------------------------------------------------------------------*/
981 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
982 {
983   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
984   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
985   const PetscScalar *x, *v;
986   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
987   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
988   PetscInt           nonzerorow = 0, n, i, jrow, j;
989 
990   PetscFunctionBegin;
991   PetscCall(VecGetArrayRead(xx, &x));
992   PetscCall(VecGetArray(yy, &y));
993   idx = a->j;
994   v   = a->a;
995   ii  = a->i;
996 
997   for (i = 0; i < m; i++) {
998     jrow = ii[i];
999     n    = ii[i + 1] - jrow;
1000     sum1 = 0.0;
1001     sum2 = 0.0;
1002     sum3 = 0.0;
1003     sum4 = 0.0;
1004     sum5 = 0.0;
1005     sum6 = 0.0;
1006     sum7 = 0.0;
1007 
1008     nonzerorow += (n > 0);
1009     for (j = 0; j < n; j++) {
1010       sum1 += v[jrow] * x[7 * idx[jrow]];
1011       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1012       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1013       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1014       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1015       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1016       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1017       jrow++;
1018     }
1019     y[7 * i]     = sum1;
1020     y[7 * i + 1] = sum2;
1021     y[7 * i + 2] = sum3;
1022     y[7 * i + 3] = sum4;
1023     y[7 * i + 4] = sum5;
1024     y[7 * i + 5] = sum6;
1025     y[7 * i + 6] = sum7;
1026   }
1027 
1028   PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow));
1029   PetscCall(VecRestoreArrayRead(xx, &x));
1030   PetscCall(VecRestoreArray(yy, &y));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
1035 {
1036   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1037   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1038   const PetscScalar *x, *v;
1039   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1040   const PetscInt     m = b->AIJ->rmap->n, *idx;
1041   PetscInt           n, i;
1042 
1043   PetscFunctionBegin;
1044   PetscCall(VecSet(yy, 0.0));
1045   PetscCall(VecGetArrayRead(xx, &x));
1046   PetscCall(VecGetArray(yy, &y));
1047 
1048   for (i = 0; i < m; i++) {
1049     idx    = a->j + a->i[i];
1050     v      = a->a + a->i[i];
1051     n      = a->i[i + 1] - a->i[i];
1052     alpha1 = x[7 * i];
1053     alpha2 = x[7 * i + 1];
1054     alpha3 = x[7 * i + 2];
1055     alpha4 = x[7 * i + 3];
1056     alpha5 = x[7 * i + 4];
1057     alpha6 = x[7 * i + 5];
1058     alpha7 = x[7 * i + 6];
1059     while (n-- > 0) {
1060       y[7 * (*idx)] += alpha1 * (*v);
1061       y[7 * (*idx) + 1] += alpha2 * (*v);
1062       y[7 * (*idx) + 2] += alpha3 * (*v);
1063       y[7 * (*idx) + 3] += alpha4 * (*v);
1064       y[7 * (*idx) + 4] += alpha5 * (*v);
1065       y[7 * (*idx) + 5] += alpha6 * (*v);
1066       y[7 * (*idx) + 6] += alpha7 * (*v);
1067       idx++;
1068       v++;
1069     }
1070   }
1071   PetscCall(PetscLogFlops(14.0 * a->nz));
1072   PetscCall(VecRestoreArrayRead(xx, &x));
1073   PetscCall(VecRestoreArray(yy, &y));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1080   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1081   const PetscScalar *x, *v;
1082   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1083   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1084   PetscInt           n, i, jrow, j;
1085 
1086   PetscFunctionBegin;
1087   if (yy != zz) PetscCall(VecCopy(yy, zz));
1088   PetscCall(VecGetArrayRead(xx, &x));
1089   PetscCall(VecGetArray(zz, &y));
1090   idx = a->j;
1091   v   = a->a;
1092   ii  = a->i;
1093 
1094   for (i = 0; i < m; i++) {
1095     jrow = ii[i];
1096     n    = ii[i + 1] - jrow;
1097     sum1 = 0.0;
1098     sum2 = 0.0;
1099     sum3 = 0.0;
1100     sum4 = 0.0;
1101     sum5 = 0.0;
1102     sum6 = 0.0;
1103     sum7 = 0.0;
1104     for (j = 0; j < n; j++) {
1105       sum1 += v[jrow] * x[7 * idx[jrow]];
1106       sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1107       sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1108       sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1109       sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1110       sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1111       sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1112       jrow++;
1113     }
1114     y[7 * i] += sum1;
1115     y[7 * i + 1] += sum2;
1116     y[7 * i + 2] += sum3;
1117     y[7 * i + 3] += sum4;
1118     y[7 * i + 4] += sum5;
1119     y[7 * i + 5] += sum6;
1120     y[7 * i + 6] += sum7;
1121   }
1122 
1123   PetscCall(PetscLogFlops(14.0 * a->nz));
1124   PetscCall(VecRestoreArrayRead(xx, &x));
1125   PetscCall(VecRestoreArray(zz, &y));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1130 {
1131   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1132   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1133   const PetscScalar *x, *v;
1134   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1135   const PetscInt     m = b->AIJ->rmap->n, *idx;
1136   PetscInt           n, i;
1137 
1138   PetscFunctionBegin;
1139   if (yy != zz) PetscCall(VecCopy(yy, zz));
1140   PetscCall(VecGetArrayRead(xx, &x));
1141   PetscCall(VecGetArray(zz, &y));
1142   for (i = 0; i < m; i++) {
1143     idx    = a->j + a->i[i];
1144     v      = a->a + a->i[i];
1145     n      = a->i[i + 1] - a->i[i];
1146     alpha1 = x[7 * i];
1147     alpha2 = x[7 * i + 1];
1148     alpha3 = x[7 * i + 2];
1149     alpha4 = x[7 * i + 3];
1150     alpha5 = x[7 * i + 4];
1151     alpha6 = x[7 * i + 5];
1152     alpha7 = x[7 * i + 6];
1153     while (n-- > 0) {
1154       y[7 * (*idx)] += alpha1 * (*v);
1155       y[7 * (*idx) + 1] += alpha2 * (*v);
1156       y[7 * (*idx) + 2] += alpha3 * (*v);
1157       y[7 * (*idx) + 3] += alpha4 * (*v);
1158       y[7 * (*idx) + 4] += alpha5 * (*v);
1159       y[7 * (*idx) + 5] += alpha6 * (*v);
1160       y[7 * (*idx) + 6] += alpha7 * (*v);
1161       idx++;
1162       v++;
1163     }
1164   }
1165   PetscCall(PetscLogFlops(14.0 * a->nz));
1166   PetscCall(VecRestoreArrayRead(xx, &x));
1167   PetscCall(VecRestoreArray(zz, &y));
1168   PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170 
1171 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1172 {
1173   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1174   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1175   const PetscScalar *x, *v;
1176   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1177   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1178   PetscInt           nonzerorow = 0, n, i, jrow, j;
1179 
1180   PetscFunctionBegin;
1181   PetscCall(VecGetArrayRead(xx, &x));
1182   PetscCall(VecGetArray(yy, &y));
1183   idx = a->j;
1184   v   = a->a;
1185   ii  = a->i;
1186 
1187   for (i = 0; i < m; i++) {
1188     jrow = ii[i];
1189     n    = ii[i + 1] - jrow;
1190     sum1 = 0.0;
1191     sum2 = 0.0;
1192     sum3 = 0.0;
1193     sum4 = 0.0;
1194     sum5 = 0.0;
1195     sum6 = 0.0;
1196     sum7 = 0.0;
1197     sum8 = 0.0;
1198 
1199     nonzerorow += (n > 0);
1200     for (j = 0; j < n; j++) {
1201       sum1 += v[jrow] * x[8 * idx[jrow]];
1202       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1203       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1204       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1205       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1206       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1207       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1208       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1209       jrow++;
1210     }
1211     y[8 * i]     = sum1;
1212     y[8 * i + 1] = sum2;
1213     y[8 * i + 2] = sum3;
1214     y[8 * i + 3] = sum4;
1215     y[8 * i + 4] = sum5;
1216     y[8 * i + 5] = sum6;
1217     y[8 * i + 6] = sum7;
1218     y[8 * i + 7] = sum8;
1219   }
1220 
1221   PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow));
1222   PetscCall(VecRestoreArrayRead(xx, &x));
1223   PetscCall(VecRestoreArray(yy, &y));
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1228 {
1229   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1230   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1231   const PetscScalar *x, *v;
1232   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1233   const PetscInt     m = b->AIJ->rmap->n, *idx;
1234   PetscInt           n, i;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(VecSet(yy, 0.0));
1238   PetscCall(VecGetArrayRead(xx, &x));
1239   PetscCall(VecGetArray(yy, &y));
1240 
1241   for (i = 0; i < m; i++) {
1242     idx    = a->j + a->i[i];
1243     v      = a->a + a->i[i];
1244     n      = a->i[i + 1] - a->i[i];
1245     alpha1 = x[8 * i];
1246     alpha2 = x[8 * i + 1];
1247     alpha3 = x[8 * i + 2];
1248     alpha4 = x[8 * i + 3];
1249     alpha5 = x[8 * i + 4];
1250     alpha6 = x[8 * i + 5];
1251     alpha7 = x[8 * i + 6];
1252     alpha8 = x[8 * i + 7];
1253     while (n-- > 0) {
1254       y[8 * (*idx)] += alpha1 * (*v);
1255       y[8 * (*idx) + 1] += alpha2 * (*v);
1256       y[8 * (*idx) + 2] += alpha3 * (*v);
1257       y[8 * (*idx) + 3] += alpha4 * (*v);
1258       y[8 * (*idx) + 4] += alpha5 * (*v);
1259       y[8 * (*idx) + 5] += alpha6 * (*v);
1260       y[8 * (*idx) + 6] += alpha7 * (*v);
1261       y[8 * (*idx) + 7] += alpha8 * (*v);
1262       idx++;
1263       v++;
1264     }
1265   }
1266   PetscCall(PetscLogFlops(16.0 * a->nz));
1267   PetscCall(VecRestoreArrayRead(xx, &x));
1268   PetscCall(VecRestoreArray(yy, &y));
1269   PetscFunctionReturn(PETSC_SUCCESS);
1270 }
1271 
1272 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1273 {
1274   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1275   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1276   const PetscScalar *x, *v;
1277   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1278   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1279   PetscInt           n, i, jrow, j;
1280 
1281   PetscFunctionBegin;
1282   if (yy != zz) PetscCall(VecCopy(yy, zz));
1283   PetscCall(VecGetArrayRead(xx, &x));
1284   PetscCall(VecGetArray(zz, &y));
1285   idx = a->j;
1286   v   = a->a;
1287   ii  = a->i;
1288 
1289   for (i = 0; i < m; i++) {
1290     jrow = ii[i];
1291     n    = ii[i + 1] - jrow;
1292     sum1 = 0.0;
1293     sum2 = 0.0;
1294     sum3 = 0.0;
1295     sum4 = 0.0;
1296     sum5 = 0.0;
1297     sum6 = 0.0;
1298     sum7 = 0.0;
1299     sum8 = 0.0;
1300     for (j = 0; j < n; j++) {
1301       sum1 += v[jrow] * x[8 * idx[jrow]];
1302       sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1303       sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1304       sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1305       sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1306       sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1307       sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1308       sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1309       jrow++;
1310     }
1311     y[8 * i] += sum1;
1312     y[8 * i + 1] += sum2;
1313     y[8 * i + 2] += sum3;
1314     y[8 * i + 3] += sum4;
1315     y[8 * i + 4] += sum5;
1316     y[8 * i + 5] += sum6;
1317     y[8 * i + 6] += sum7;
1318     y[8 * i + 7] += sum8;
1319   }
1320 
1321   PetscCall(PetscLogFlops(16.0 * a->nz));
1322   PetscCall(VecRestoreArrayRead(xx, &x));
1323   PetscCall(VecRestoreArray(zz, &y));
1324   PetscFunctionReturn(PETSC_SUCCESS);
1325 }
1326 
1327 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1328 {
1329   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1330   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1331   const PetscScalar *x, *v;
1332   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1333   const PetscInt     m = b->AIJ->rmap->n, *idx;
1334   PetscInt           n, i;
1335 
1336   PetscFunctionBegin;
1337   if (yy != zz) PetscCall(VecCopy(yy, zz));
1338   PetscCall(VecGetArrayRead(xx, &x));
1339   PetscCall(VecGetArray(zz, &y));
1340   for (i = 0; i < m; i++) {
1341     idx    = a->j + a->i[i];
1342     v      = a->a + a->i[i];
1343     n      = a->i[i + 1] - a->i[i];
1344     alpha1 = x[8 * i];
1345     alpha2 = x[8 * i + 1];
1346     alpha3 = x[8 * i + 2];
1347     alpha4 = x[8 * i + 3];
1348     alpha5 = x[8 * i + 4];
1349     alpha6 = x[8 * i + 5];
1350     alpha7 = x[8 * i + 6];
1351     alpha8 = x[8 * i + 7];
1352     while (n-- > 0) {
1353       y[8 * (*idx)] += alpha1 * (*v);
1354       y[8 * (*idx) + 1] += alpha2 * (*v);
1355       y[8 * (*idx) + 2] += alpha3 * (*v);
1356       y[8 * (*idx) + 3] += alpha4 * (*v);
1357       y[8 * (*idx) + 4] += alpha5 * (*v);
1358       y[8 * (*idx) + 5] += alpha6 * (*v);
1359       y[8 * (*idx) + 6] += alpha7 * (*v);
1360       y[8 * (*idx) + 7] += alpha8 * (*v);
1361       idx++;
1362       v++;
1363     }
1364   }
1365   PetscCall(PetscLogFlops(16.0 * a->nz));
1366   PetscCall(VecRestoreArrayRead(xx, &x));
1367   PetscCall(VecRestoreArray(zz, &y));
1368   PetscFunctionReturn(PETSC_SUCCESS);
1369 }
1370 
1371 /* ------------------------------------------------------------------------------*/
1372 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1373 {
1374   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1375   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1376   const PetscScalar *x, *v;
1377   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1378   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1379   PetscInt           nonzerorow = 0, n, i, jrow, j;
1380 
1381   PetscFunctionBegin;
1382   PetscCall(VecGetArrayRead(xx, &x));
1383   PetscCall(VecGetArray(yy, &y));
1384   idx = a->j;
1385   v   = a->a;
1386   ii  = a->i;
1387 
1388   for (i = 0; i < m; i++) {
1389     jrow = ii[i];
1390     n    = ii[i + 1] - jrow;
1391     sum1 = 0.0;
1392     sum2 = 0.0;
1393     sum3 = 0.0;
1394     sum4 = 0.0;
1395     sum5 = 0.0;
1396     sum6 = 0.0;
1397     sum7 = 0.0;
1398     sum8 = 0.0;
1399     sum9 = 0.0;
1400 
1401     nonzerorow += (n > 0);
1402     for (j = 0; j < n; j++) {
1403       sum1 += v[jrow] * x[9 * idx[jrow]];
1404       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1405       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1406       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1407       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1408       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1409       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1410       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1411       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1412       jrow++;
1413     }
1414     y[9 * i]     = sum1;
1415     y[9 * i + 1] = sum2;
1416     y[9 * i + 2] = sum3;
1417     y[9 * i + 3] = sum4;
1418     y[9 * i + 4] = sum5;
1419     y[9 * i + 5] = sum6;
1420     y[9 * i + 6] = sum7;
1421     y[9 * i + 7] = sum8;
1422     y[9 * i + 8] = sum9;
1423   }
1424 
1425   PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow));
1426   PetscCall(VecRestoreArrayRead(xx, &x));
1427   PetscCall(VecRestoreArray(yy, &y));
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
1431 /* ------------------------------------------------------------------------------*/
1432 
1433 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1434 {
1435   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1436   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1437   const PetscScalar *x, *v;
1438   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1439   const PetscInt     m = b->AIJ->rmap->n, *idx;
1440   PetscInt           n, i;
1441 
1442   PetscFunctionBegin;
1443   PetscCall(VecSet(yy, 0.0));
1444   PetscCall(VecGetArrayRead(xx, &x));
1445   PetscCall(VecGetArray(yy, &y));
1446 
1447   for (i = 0; i < m; i++) {
1448     idx    = a->j + a->i[i];
1449     v      = a->a + a->i[i];
1450     n      = a->i[i + 1] - a->i[i];
1451     alpha1 = x[9 * i];
1452     alpha2 = x[9 * i + 1];
1453     alpha3 = x[9 * i + 2];
1454     alpha4 = x[9 * i + 3];
1455     alpha5 = x[9 * i + 4];
1456     alpha6 = x[9 * i + 5];
1457     alpha7 = x[9 * i + 6];
1458     alpha8 = x[9 * i + 7];
1459     alpha9 = x[9 * i + 8];
1460     while (n-- > 0) {
1461       y[9 * (*idx)] += alpha1 * (*v);
1462       y[9 * (*idx) + 1] += alpha2 * (*v);
1463       y[9 * (*idx) + 2] += alpha3 * (*v);
1464       y[9 * (*idx) + 3] += alpha4 * (*v);
1465       y[9 * (*idx) + 4] += alpha5 * (*v);
1466       y[9 * (*idx) + 5] += alpha6 * (*v);
1467       y[9 * (*idx) + 6] += alpha7 * (*v);
1468       y[9 * (*idx) + 7] += alpha8 * (*v);
1469       y[9 * (*idx) + 8] += alpha9 * (*v);
1470       idx++;
1471       v++;
1472     }
1473   }
1474   PetscCall(PetscLogFlops(18.0 * a->nz));
1475   PetscCall(VecRestoreArrayRead(xx, &x));
1476   PetscCall(VecRestoreArray(yy, &y));
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1481 {
1482   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1483   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1484   const PetscScalar *x, *v;
1485   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1486   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1487   PetscInt           n, i, jrow, j;
1488 
1489   PetscFunctionBegin;
1490   if (yy != zz) PetscCall(VecCopy(yy, zz));
1491   PetscCall(VecGetArrayRead(xx, &x));
1492   PetscCall(VecGetArray(zz, &y));
1493   idx = a->j;
1494   v   = a->a;
1495   ii  = a->i;
1496 
1497   for (i = 0; i < m; i++) {
1498     jrow = ii[i];
1499     n    = ii[i + 1] - jrow;
1500     sum1 = 0.0;
1501     sum2 = 0.0;
1502     sum3 = 0.0;
1503     sum4 = 0.0;
1504     sum5 = 0.0;
1505     sum6 = 0.0;
1506     sum7 = 0.0;
1507     sum8 = 0.0;
1508     sum9 = 0.0;
1509     for (j = 0; j < n; j++) {
1510       sum1 += v[jrow] * x[9 * idx[jrow]];
1511       sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1512       sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1513       sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1514       sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1515       sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1516       sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1517       sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1518       sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1519       jrow++;
1520     }
1521     y[9 * i] += sum1;
1522     y[9 * i + 1] += sum2;
1523     y[9 * i + 2] += sum3;
1524     y[9 * i + 3] += sum4;
1525     y[9 * i + 4] += sum5;
1526     y[9 * i + 5] += sum6;
1527     y[9 * i + 6] += sum7;
1528     y[9 * i + 7] += sum8;
1529     y[9 * i + 8] += sum9;
1530   }
1531 
1532   PetscCall(PetscLogFlops(18.0 * a->nz));
1533   PetscCall(VecRestoreArrayRead(xx, &x));
1534   PetscCall(VecRestoreArray(zz, &y));
1535   PetscFunctionReturn(PETSC_SUCCESS);
1536 }
1537 
1538 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1539 {
1540   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1541   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1542   const PetscScalar *x, *v;
1543   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1544   const PetscInt     m = b->AIJ->rmap->n, *idx;
1545   PetscInt           n, i;
1546 
1547   PetscFunctionBegin;
1548   if (yy != zz) PetscCall(VecCopy(yy, zz));
1549   PetscCall(VecGetArrayRead(xx, &x));
1550   PetscCall(VecGetArray(zz, &y));
1551   for (i = 0; i < m; i++) {
1552     idx    = a->j + a->i[i];
1553     v      = a->a + a->i[i];
1554     n      = a->i[i + 1] - a->i[i];
1555     alpha1 = x[9 * i];
1556     alpha2 = x[9 * i + 1];
1557     alpha3 = x[9 * i + 2];
1558     alpha4 = x[9 * i + 3];
1559     alpha5 = x[9 * i + 4];
1560     alpha6 = x[9 * i + 5];
1561     alpha7 = x[9 * i + 6];
1562     alpha8 = x[9 * i + 7];
1563     alpha9 = x[9 * i + 8];
1564     while (n-- > 0) {
1565       y[9 * (*idx)] += alpha1 * (*v);
1566       y[9 * (*idx) + 1] += alpha2 * (*v);
1567       y[9 * (*idx) + 2] += alpha3 * (*v);
1568       y[9 * (*idx) + 3] += alpha4 * (*v);
1569       y[9 * (*idx) + 4] += alpha5 * (*v);
1570       y[9 * (*idx) + 5] += alpha6 * (*v);
1571       y[9 * (*idx) + 6] += alpha7 * (*v);
1572       y[9 * (*idx) + 7] += alpha8 * (*v);
1573       y[9 * (*idx) + 8] += alpha9 * (*v);
1574       idx++;
1575       v++;
1576     }
1577   }
1578   PetscCall(PetscLogFlops(18.0 * a->nz));
1579   PetscCall(VecRestoreArrayRead(xx, &x));
1580   PetscCall(VecRestoreArray(zz, &y));
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1584 {
1585   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1586   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1587   const PetscScalar *x, *v;
1588   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1589   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1590   PetscInt           nonzerorow = 0, n, i, jrow, j;
1591 
1592   PetscFunctionBegin;
1593   PetscCall(VecGetArrayRead(xx, &x));
1594   PetscCall(VecGetArray(yy, &y));
1595   idx = a->j;
1596   v   = a->a;
1597   ii  = a->i;
1598 
1599   for (i = 0; i < m; i++) {
1600     jrow  = ii[i];
1601     n     = ii[i + 1] - jrow;
1602     sum1  = 0.0;
1603     sum2  = 0.0;
1604     sum3  = 0.0;
1605     sum4  = 0.0;
1606     sum5  = 0.0;
1607     sum6  = 0.0;
1608     sum7  = 0.0;
1609     sum8  = 0.0;
1610     sum9  = 0.0;
1611     sum10 = 0.0;
1612 
1613     nonzerorow += (n > 0);
1614     for (j = 0; j < n; j++) {
1615       sum1 += v[jrow] * x[10 * idx[jrow]];
1616       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1617       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1618       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1619       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1620       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1621       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1622       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1623       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1624       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1625       jrow++;
1626     }
1627     y[10 * i]     = sum1;
1628     y[10 * i + 1] = sum2;
1629     y[10 * i + 2] = sum3;
1630     y[10 * i + 3] = sum4;
1631     y[10 * i + 4] = sum5;
1632     y[10 * i + 5] = sum6;
1633     y[10 * i + 6] = sum7;
1634     y[10 * i + 7] = sum8;
1635     y[10 * i + 8] = sum9;
1636     y[10 * i + 9] = sum10;
1637   }
1638 
1639   PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow));
1640   PetscCall(VecRestoreArrayRead(xx, &x));
1641   PetscCall(VecRestoreArray(yy, &y));
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1646 {
1647   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1648   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1649   const PetscScalar *x, *v;
1650   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1651   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1652   PetscInt           n, i, jrow, j;
1653 
1654   PetscFunctionBegin;
1655   if (yy != zz) PetscCall(VecCopy(yy, zz));
1656   PetscCall(VecGetArrayRead(xx, &x));
1657   PetscCall(VecGetArray(zz, &y));
1658   idx = a->j;
1659   v   = a->a;
1660   ii  = a->i;
1661 
1662   for (i = 0; i < m; i++) {
1663     jrow  = ii[i];
1664     n     = ii[i + 1] - jrow;
1665     sum1  = 0.0;
1666     sum2  = 0.0;
1667     sum3  = 0.0;
1668     sum4  = 0.0;
1669     sum5  = 0.0;
1670     sum6  = 0.0;
1671     sum7  = 0.0;
1672     sum8  = 0.0;
1673     sum9  = 0.0;
1674     sum10 = 0.0;
1675     for (j = 0; j < n; j++) {
1676       sum1 += v[jrow] * x[10 * idx[jrow]];
1677       sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1678       sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1679       sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1680       sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1681       sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1682       sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1683       sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1684       sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1685       sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1686       jrow++;
1687     }
1688     y[10 * i] += sum1;
1689     y[10 * i + 1] += sum2;
1690     y[10 * i + 2] += sum3;
1691     y[10 * i + 3] += sum4;
1692     y[10 * i + 4] += sum5;
1693     y[10 * i + 5] += sum6;
1694     y[10 * i + 6] += sum7;
1695     y[10 * i + 7] += sum8;
1696     y[10 * i + 8] += sum9;
1697     y[10 * i + 9] += sum10;
1698   }
1699 
1700   PetscCall(PetscLogFlops(20.0 * a->nz));
1701   PetscCall(VecRestoreArrayRead(xx, &x));
1702   PetscCall(VecRestoreArray(yy, &y));
1703   PetscFunctionReturn(PETSC_SUCCESS);
1704 }
1705 
1706 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1707 {
1708   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1709   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1710   const PetscScalar *x, *v;
1711   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1712   const PetscInt     m = b->AIJ->rmap->n, *idx;
1713   PetscInt           n, i;
1714 
1715   PetscFunctionBegin;
1716   PetscCall(VecSet(yy, 0.0));
1717   PetscCall(VecGetArrayRead(xx, &x));
1718   PetscCall(VecGetArray(yy, &y));
1719 
1720   for (i = 0; i < m; i++) {
1721     idx     = a->j + a->i[i];
1722     v       = a->a + a->i[i];
1723     n       = a->i[i + 1] - a->i[i];
1724     alpha1  = x[10 * i];
1725     alpha2  = x[10 * i + 1];
1726     alpha3  = x[10 * i + 2];
1727     alpha4  = x[10 * i + 3];
1728     alpha5  = x[10 * i + 4];
1729     alpha6  = x[10 * i + 5];
1730     alpha7  = x[10 * i + 6];
1731     alpha8  = x[10 * i + 7];
1732     alpha9  = x[10 * i + 8];
1733     alpha10 = x[10 * i + 9];
1734     while (n-- > 0) {
1735       y[10 * (*idx)] += alpha1 * (*v);
1736       y[10 * (*idx) + 1] += alpha2 * (*v);
1737       y[10 * (*idx) + 2] += alpha3 * (*v);
1738       y[10 * (*idx) + 3] += alpha4 * (*v);
1739       y[10 * (*idx) + 4] += alpha5 * (*v);
1740       y[10 * (*idx) + 5] += alpha6 * (*v);
1741       y[10 * (*idx) + 6] += alpha7 * (*v);
1742       y[10 * (*idx) + 7] += alpha8 * (*v);
1743       y[10 * (*idx) + 8] += alpha9 * (*v);
1744       y[10 * (*idx) + 9] += alpha10 * (*v);
1745       idx++;
1746       v++;
1747     }
1748   }
1749   PetscCall(PetscLogFlops(20.0 * a->nz));
1750   PetscCall(VecRestoreArrayRead(xx, &x));
1751   PetscCall(VecRestoreArray(yy, &y));
1752   PetscFunctionReturn(PETSC_SUCCESS);
1753 }
1754 
1755 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1756 {
1757   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1758   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1759   const PetscScalar *x, *v;
1760   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1761   const PetscInt     m = b->AIJ->rmap->n, *idx;
1762   PetscInt           n, i;
1763 
1764   PetscFunctionBegin;
1765   if (yy != zz) PetscCall(VecCopy(yy, zz));
1766   PetscCall(VecGetArrayRead(xx, &x));
1767   PetscCall(VecGetArray(zz, &y));
1768   for (i = 0; i < m; i++) {
1769     idx     = a->j + a->i[i];
1770     v       = a->a + a->i[i];
1771     n       = a->i[i + 1] - a->i[i];
1772     alpha1  = x[10 * i];
1773     alpha2  = x[10 * i + 1];
1774     alpha3  = x[10 * i + 2];
1775     alpha4  = x[10 * i + 3];
1776     alpha5  = x[10 * i + 4];
1777     alpha6  = x[10 * i + 5];
1778     alpha7  = x[10 * i + 6];
1779     alpha8  = x[10 * i + 7];
1780     alpha9  = x[10 * i + 8];
1781     alpha10 = x[10 * i + 9];
1782     while (n-- > 0) {
1783       y[10 * (*idx)] += alpha1 * (*v);
1784       y[10 * (*idx) + 1] += alpha2 * (*v);
1785       y[10 * (*idx) + 2] += alpha3 * (*v);
1786       y[10 * (*idx) + 3] += alpha4 * (*v);
1787       y[10 * (*idx) + 4] += alpha5 * (*v);
1788       y[10 * (*idx) + 5] += alpha6 * (*v);
1789       y[10 * (*idx) + 6] += alpha7 * (*v);
1790       y[10 * (*idx) + 7] += alpha8 * (*v);
1791       y[10 * (*idx) + 8] += alpha9 * (*v);
1792       y[10 * (*idx) + 9] += alpha10 * (*v);
1793       idx++;
1794       v++;
1795     }
1796   }
1797   PetscCall(PetscLogFlops(20.0 * a->nz));
1798   PetscCall(VecRestoreArrayRead(xx, &x));
1799   PetscCall(VecRestoreArray(zz, &y));
1800   PetscFunctionReturn(PETSC_SUCCESS);
1801 }
1802 
1803 /*--------------------------------------------------------------------------------------------*/
1804 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1805 {
1806   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1807   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1808   const PetscScalar *x, *v;
1809   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1810   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
1811   PetscInt           nonzerorow = 0, n, i, jrow, j;
1812 
1813   PetscFunctionBegin;
1814   PetscCall(VecGetArrayRead(xx, &x));
1815   PetscCall(VecGetArray(yy, &y));
1816   idx = a->j;
1817   v   = a->a;
1818   ii  = a->i;
1819 
1820   for (i = 0; i < m; i++) {
1821     jrow  = ii[i];
1822     n     = ii[i + 1] - jrow;
1823     sum1  = 0.0;
1824     sum2  = 0.0;
1825     sum3  = 0.0;
1826     sum4  = 0.0;
1827     sum5  = 0.0;
1828     sum6  = 0.0;
1829     sum7  = 0.0;
1830     sum8  = 0.0;
1831     sum9  = 0.0;
1832     sum10 = 0.0;
1833     sum11 = 0.0;
1834 
1835     nonzerorow += (n > 0);
1836     for (j = 0; j < n; j++) {
1837       sum1 += v[jrow] * x[11 * idx[jrow]];
1838       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1839       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1840       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1841       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1842       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1843       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1844       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1845       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1846       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1847       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1848       jrow++;
1849     }
1850     y[11 * i]      = sum1;
1851     y[11 * i + 1]  = sum2;
1852     y[11 * i + 2]  = sum3;
1853     y[11 * i + 3]  = sum4;
1854     y[11 * i + 4]  = sum5;
1855     y[11 * i + 5]  = sum6;
1856     y[11 * i + 6]  = sum7;
1857     y[11 * i + 7]  = sum8;
1858     y[11 * i + 8]  = sum9;
1859     y[11 * i + 9]  = sum10;
1860     y[11 * i + 10] = sum11;
1861   }
1862 
1863   PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow));
1864   PetscCall(VecRestoreArrayRead(xx, &x));
1865   PetscCall(VecRestoreArray(yy, &y));
1866   PetscFunctionReturn(PETSC_SUCCESS);
1867 }
1868 
1869 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1870 {
1871   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1872   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1873   const PetscScalar *x, *v;
1874   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1875   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
1876   PetscInt           n, i, jrow, j;
1877 
1878   PetscFunctionBegin;
1879   if (yy != zz) PetscCall(VecCopy(yy, zz));
1880   PetscCall(VecGetArrayRead(xx, &x));
1881   PetscCall(VecGetArray(zz, &y));
1882   idx = a->j;
1883   v   = a->a;
1884   ii  = a->i;
1885 
1886   for (i = 0; i < m; i++) {
1887     jrow  = ii[i];
1888     n     = ii[i + 1] - jrow;
1889     sum1  = 0.0;
1890     sum2  = 0.0;
1891     sum3  = 0.0;
1892     sum4  = 0.0;
1893     sum5  = 0.0;
1894     sum6  = 0.0;
1895     sum7  = 0.0;
1896     sum8  = 0.0;
1897     sum9  = 0.0;
1898     sum10 = 0.0;
1899     sum11 = 0.0;
1900     for (j = 0; j < n; j++) {
1901       sum1 += v[jrow] * x[11 * idx[jrow]];
1902       sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1903       sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1904       sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1905       sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1906       sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1907       sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1908       sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1909       sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1910       sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1911       sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1912       jrow++;
1913     }
1914     y[11 * i] += sum1;
1915     y[11 * i + 1] += sum2;
1916     y[11 * i + 2] += sum3;
1917     y[11 * i + 3] += sum4;
1918     y[11 * i + 4] += sum5;
1919     y[11 * i + 5] += sum6;
1920     y[11 * i + 6] += sum7;
1921     y[11 * i + 7] += sum8;
1922     y[11 * i + 8] += sum9;
1923     y[11 * i + 9] += sum10;
1924     y[11 * i + 10] += sum11;
1925   }
1926 
1927   PetscCall(PetscLogFlops(22.0 * a->nz));
1928   PetscCall(VecRestoreArrayRead(xx, &x));
1929   PetscCall(VecRestoreArray(yy, &y));
1930   PetscFunctionReturn(PETSC_SUCCESS);
1931 }
1932 
1933 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1934 {
1935   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1936   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1937   const PetscScalar *x, *v;
1938   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1939   const PetscInt     m = b->AIJ->rmap->n, *idx;
1940   PetscInt           n, i;
1941 
1942   PetscFunctionBegin;
1943   PetscCall(VecSet(yy, 0.0));
1944   PetscCall(VecGetArrayRead(xx, &x));
1945   PetscCall(VecGetArray(yy, &y));
1946 
1947   for (i = 0; i < m; i++) {
1948     idx     = a->j + a->i[i];
1949     v       = a->a + a->i[i];
1950     n       = a->i[i + 1] - a->i[i];
1951     alpha1  = x[11 * i];
1952     alpha2  = x[11 * i + 1];
1953     alpha3  = x[11 * i + 2];
1954     alpha4  = x[11 * i + 3];
1955     alpha5  = x[11 * i + 4];
1956     alpha6  = x[11 * i + 5];
1957     alpha7  = x[11 * i + 6];
1958     alpha8  = x[11 * i + 7];
1959     alpha9  = x[11 * i + 8];
1960     alpha10 = x[11 * i + 9];
1961     alpha11 = x[11 * i + 10];
1962     while (n-- > 0) {
1963       y[11 * (*idx)] += alpha1 * (*v);
1964       y[11 * (*idx) + 1] += alpha2 * (*v);
1965       y[11 * (*idx) + 2] += alpha3 * (*v);
1966       y[11 * (*idx) + 3] += alpha4 * (*v);
1967       y[11 * (*idx) + 4] += alpha5 * (*v);
1968       y[11 * (*idx) + 5] += alpha6 * (*v);
1969       y[11 * (*idx) + 6] += alpha7 * (*v);
1970       y[11 * (*idx) + 7] += alpha8 * (*v);
1971       y[11 * (*idx) + 8] += alpha9 * (*v);
1972       y[11 * (*idx) + 9] += alpha10 * (*v);
1973       y[11 * (*idx) + 10] += alpha11 * (*v);
1974       idx++;
1975       v++;
1976     }
1977   }
1978   PetscCall(PetscLogFlops(22.0 * a->nz));
1979   PetscCall(VecRestoreArrayRead(xx, &x));
1980   PetscCall(VecRestoreArray(yy, &y));
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1985 {
1986   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
1987   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
1988   const PetscScalar *x, *v;
1989   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1990   const PetscInt     m = b->AIJ->rmap->n, *idx;
1991   PetscInt           n, i;
1992 
1993   PetscFunctionBegin;
1994   if (yy != zz) PetscCall(VecCopy(yy, zz));
1995   PetscCall(VecGetArrayRead(xx, &x));
1996   PetscCall(VecGetArray(zz, &y));
1997   for (i = 0; i < m; i++) {
1998     idx     = a->j + a->i[i];
1999     v       = a->a + a->i[i];
2000     n       = a->i[i + 1] - a->i[i];
2001     alpha1  = x[11 * i];
2002     alpha2  = x[11 * i + 1];
2003     alpha3  = x[11 * i + 2];
2004     alpha4  = x[11 * i + 3];
2005     alpha5  = x[11 * i + 4];
2006     alpha6  = x[11 * i + 5];
2007     alpha7  = x[11 * i + 6];
2008     alpha8  = x[11 * i + 7];
2009     alpha9  = x[11 * i + 8];
2010     alpha10 = x[11 * i + 9];
2011     alpha11 = x[11 * i + 10];
2012     while (n-- > 0) {
2013       y[11 * (*idx)] += alpha1 * (*v);
2014       y[11 * (*idx) + 1] += alpha2 * (*v);
2015       y[11 * (*idx) + 2] += alpha3 * (*v);
2016       y[11 * (*idx) + 3] += alpha4 * (*v);
2017       y[11 * (*idx) + 4] += alpha5 * (*v);
2018       y[11 * (*idx) + 5] += alpha6 * (*v);
2019       y[11 * (*idx) + 6] += alpha7 * (*v);
2020       y[11 * (*idx) + 7] += alpha8 * (*v);
2021       y[11 * (*idx) + 8] += alpha9 * (*v);
2022       y[11 * (*idx) + 9] += alpha10 * (*v);
2023       y[11 * (*idx) + 10] += alpha11 * (*v);
2024       idx++;
2025       v++;
2026     }
2027   }
2028   PetscCall(PetscLogFlops(22.0 * a->nz));
2029   PetscCall(VecRestoreArrayRead(xx, &x));
2030   PetscCall(VecRestoreArray(zz, &y));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 /*--------------------------------------------------------------------------------------------*/
2035 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2036 {
2037   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2038   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2039   const PetscScalar *x, *v;
2040   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2041   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2042   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2043   PetscInt           nonzerorow = 0, n, i, jrow, j;
2044 
2045   PetscFunctionBegin;
2046   PetscCall(VecGetArrayRead(xx, &x));
2047   PetscCall(VecGetArray(yy, &y));
2048   idx = a->j;
2049   v   = a->a;
2050   ii  = a->i;
2051 
2052   for (i = 0; i < m; i++) {
2053     jrow  = ii[i];
2054     n     = ii[i + 1] - jrow;
2055     sum1  = 0.0;
2056     sum2  = 0.0;
2057     sum3  = 0.0;
2058     sum4  = 0.0;
2059     sum5  = 0.0;
2060     sum6  = 0.0;
2061     sum7  = 0.0;
2062     sum8  = 0.0;
2063     sum9  = 0.0;
2064     sum10 = 0.0;
2065     sum11 = 0.0;
2066     sum12 = 0.0;
2067     sum13 = 0.0;
2068     sum14 = 0.0;
2069     sum15 = 0.0;
2070     sum16 = 0.0;
2071 
2072     nonzerorow += (n > 0);
2073     for (j = 0; j < n; j++) {
2074       sum1 += v[jrow] * x[16 * idx[jrow]];
2075       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2076       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2077       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2078       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2079       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2080       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2081       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2082       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2083       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2084       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2085       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2086       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2087       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2088       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2089       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2090       jrow++;
2091     }
2092     y[16 * i]      = sum1;
2093     y[16 * i + 1]  = sum2;
2094     y[16 * i + 2]  = sum3;
2095     y[16 * i + 3]  = sum4;
2096     y[16 * i + 4]  = sum5;
2097     y[16 * i + 5]  = sum6;
2098     y[16 * i + 6]  = sum7;
2099     y[16 * i + 7]  = sum8;
2100     y[16 * i + 8]  = sum9;
2101     y[16 * i + 9]  = sum10;
2102     y[16 * i + 10] = sum11;
2103     y[16 * i + 11] = sum12;
2104     y[16 * i + 12] = sum13;
2105     y[16 * i + 13] = sum14;
2106     y[16 * i + 14] = sum15;
2107     y[16 * i + 15] = sum16;
2108   }
2109 
2110   PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow));
2111   PetscCall(VecRestoreArrayRead(xx, &x));
2112   PetscCall(VecRestoreArray(yy, &y));
2113   PetscFunctionReturn(PETSC_SUCCESS);
2114 }
2115 
2116 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2117 {
2118   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2119   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2120   const PetscScalar *x, *v;
2121   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2122   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2123   const PetscInt     m = b->AIJ->rmap->n, *idx;
2124   PetscInt           n, i;
2125 
2126   PetscFunctionBegin;
2127   PetscCall(VecSet(yy, 0.0));
2128   PetscCall(VecGetArrayRead(xx, &x));
2129   PetscCall(VecGetArray(yy, &y));
2130 
2131   for (i = 0; i < m; i++) {
2132     idx     = a->j + a->i[i];
2133     v       = a->a + a->i[i];
2134     n       = a->i[i + 1] - a->i[i];
2135     alpha1  = x[16 * i];
2136     alpha2  = x[16 * i + 1];
2137     alpha3  = x[16 * i + 2];
2138     alpha4  = x[16 * i + 3];
2139     alpha5  = x[16 * i + 4];
2140     alpha6  = x[16 * i + 5];
2141     alpha7  = x[16 * i + 6];
2142     alpha8  = x[16 * i + 7];
2143     alpha9  = x[16 * i + 8];
2144     alpha10 = x[16 * i + 9];
2145     alpha11 = x[16 * i + 10];
2146     alpha12 = x[16 * i + 11];
2147     alpha13 = x[16 * i + 12];
2148     alpha14 = x[16 * i + 13];
2149     alpha15 = x[16 * i + 14];
2150     alpha16 = x[16 * i + 15];
2151     while (n-- > 0) {
2152       y[16 * (*idx)] += alpha1 * (*v);
2153       y[16 * (*idx) + 1] += alpha2 * (*v);
2154       y[16 * (*idx) + 2] += alpha3 * (*v);
2155       y[16 * (*idx) + 3] += alpha4 * (*v);
2156       y[16 * (*idx) + 4] += alpha5 * (*v);
2157       y[16 * (*idx) + 5] += alpha6 * (*v);
2158       y[16 * (*idx) + 6] += alpha7 * (*v);
2159       y[16 * (*idx) + 7] += alpha8 * (*v);
2160       y[16 * (*idx) + 8] += alpha9 * (*v);
2161       y[16 * (*idx) + 9] += alpha10 * (*v);
2162       y[16 * (*idx) + 10] += alpha11 * (*v);
2163       y[16 * (*idx) + 11] += alpha12 * (*v);
2164       y[16 * (*idx) + 12] += alpha13 * (*v);
2165       y[16 * (*idx) + 13] += alpha14 * (*v);
2166       y[16 * (*idx) + 14] += alpha15 * (*v);
2167       y[16 * (*idx) + 15] += alpha16 * (*v);
2168       idx++;
2169       v++;
2170     }
2171   }
2172   PetscCall(PetscLogFlops(32.0 * a->nz));
2173   PetscCall(VecRestoreArrayRead(xx, &x));
2174   PetscCall(VecRestoreArray(yy, &y));
2175   PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177 
2178 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2179 {
2180   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2181   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2182   const PetscScalar *x, *v;
2183   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2184   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2185   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2186   PetscInt           n, i, jrow, j;
2187 
2188   PetscFunctionBegin;
2189   if (yy != zz) PetscCall(VecCopy(yy, zz));
2190   PetscCall(VecGetArrayRead(xx, &x));
2191   PetscCall(VecGetArray(zz, &y));
2192   idx = a->j;
2193   v   = a->a;
2194   ii  = a->i;
2195 
2196   for (i = 0; i < m; i++) {
2197     jrow  = ii[i];
2198     n     = ii[i + 1] - jrow;
2199     sum1  = 0.0;
2200     sum2  = 0.0;
2201     sum3  = 0.0;
2202     sum4  = 0.0;
2203     sum5  = 0.0;
2204     sum6  = 0.0;
2205     sum7  = 0.0;
2206     sum8  = 0.0;
2207     sum9  = 0.0;
2208     sum10 = 0.0;
2209     sum11 = 0.0;
2210     sum12 = 0.0;
2211     sum13 = 0.0;
2212     sum14 = 0.0;
2213     sum15 = 0.0;
2214     sum16 = 0.0;
2215     for (j = 0; j < n; j++) {
2216       sum1 += v[jrow] * x[16 * idx[jrow]];
2217       sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2218       sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2219       sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2220       sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2221       sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2222       sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2223       sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2224       sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2225       sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2226       sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2227       sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2228       sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2229       sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2230       sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2231       sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2232       jrow++;
2233     }
2234     y[16 * i] += sum1;
2235     y[16 * i + 1] += sum2;
2236     y[16 * i + 2] += sum3;
2237     y[16 * i + 3] += sum4;
2238     y[16 * i + 4] += sum5;
2239     y[16 * i + 5] += sum6;
2240     y[16 * i + 6] += sum7;
2241     y[16 * i + 7] += sum8;
2242     y[16 * i + 8] += sum9;
2243     y[16 * i + 9] += sum10;
2244     y[16 * i + 10] += sum11;
2245     y[16 * i + 11] += sum12;
2246     y[16 * i + 12] += sum13;
2247     y[16 * i + 13] += sum14;
2248     y[16 * i + 14] += sum15;
2249     y[16 * i + 15] += sum16;
2250   }
2251 
2252   PetscCall(PetscLogFlops(32.0 * a->nz));
2253   PetscCall(VecRestoreArrayRead(xx, &x));
2254   PetscCall(VecRestoreArray(zz, &y));
2255   PetscFunctionReturn(PETSC_SUCCESS);
2256 }
2257 
2258 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2259 {
2260   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2261   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2262   const PetscScalar *x, *v;
2263   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2264   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2265   const PetscInt     m = b->AIJ->rmap->n, *idx;
2266   PetscInt           n, i;
2267 
2268   PetscFunctionBegin;
2269   if (yy != zz) PetscCall(VecCopy(yy, zz));
2270   PetscCall(VecGetArrayRead(xx, &x));
2271   PetscCall(VecGetArray(zz, &y));
2272   for (i = 0; i < m; i++) {
2273     idx     = a->j + a->i[i];
2274     v       = a->a + a->i[i];
2275     n       = a->i[i + 1] - a->i[i];
2276     alpha1  = x[16 * i];
2277     alpha2  = x[16 * i + 1];
2278     alpha3  = x[16 * i + 2];
2279     alpha4  = x[16 * i + 3];
2280     alpha5  = x[16 * i + 4];
2281     alpha6  = x[16 * i + 5];
2282     alpha7  = x[16 * i + 6];
2283     alpha8  = x[16 * i + 7];
2284     alpha9  = x[16 * i + 8];
2285     alpha10 = x[16 * i + 9];
2286     alpha11 = x[16 * i + 10];
2287     alpha12 = x[16 * i + 11];
2288     alpha13 = x[16 * i + 12];
2289     alpha14 = x[16 * i + 13];
2290     alpha15 = x[16 * i + 14];
2291     alpha16 = x[16 * i + 15];
2292     while (n-- > 0) {
2293       y[16 * (*idx)] += alpha1 * (*v);
2294       y[16 * (*idx) + 1] += alpha2 * (*v);
2295       y[16 * (*idx) + 2] += alpha3 * (*v);
2296       y[16 * (*idx) + 3] += alpha4 * (*v);
2297       y[16 * (*idx) + 4] += alpha5 * (*v);
2298       y[16 * (*idx) + 5] += alpha6 * (*v);
2299       y[16 * (*idx) + 6] += alpha7 * (*v);
2300       y[16 * (*idx) + 7] += alpha8 * (*v);
2301       y[16 * (*idx) + 8] += alpha9 * (*v);
2302       y[16 * (*idx) + 9] += alpha10 * (*v);
2303       y[16 * (*idx) + 10] += alpha11 * (*v);
2304       y[16 * (*idx) + 11] += alpha12 * (*v);
2305       y[16 * (*idx) + 12] += alpha13 * (*v);
2306       y[16 * (*idx) + 13] += alpha14 * (*v);
2307       y[16 * (*idx) + 14] += alpha15 * (*v);
2308       y[16 * (*idx) + 15] += alpha16 * (*v);
2309       idx++;
2310       v++;
2311     }
2312   }
2313   PetscCall(PetscLogFlops(32.0 * a->nz));
2314   PetscCall(VecRestoreArrayRead(xx, &x));
2315   PetscCall(VecRestoreArray(zz, &y));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 /*--------------------------------------------------------------------------------------------*/
2320 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2321 {
2322   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2323   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2324   const PetscScalar *x, *v;
2325   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2326   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2327   const PetscInt     m          = b->AIJ->rmap->n, *idx, *ii;
2328   PetscInt           nonzerorow = 0, n, i, jrow, j;
2329 
2330   PetscFunctionBegin;
2331   PetscCall(VecGetArrayRead(xx, &x));
2332   PetscCall(VecGetArray(yy, &y));
2333   idx = a->j;
2334   v   = a->a;
2335   ii  = a->i;
2336 
2337   for (i = 0; i < m; i++) {
2338     jrow  = ii[i];
2339     n     = ii[i + 1] - jrow;
2340     sum1  = 0.0;
2341     sum2  = 0.0;
2342     sum3  = 0.0;
2343     sum4  = 0.0;
2344     sum5  = 0.0;
2345     sum6  = 0.0;
2346     sum7  = 0.0;
2347     sum8  = 0.0;
2348     sum9  = 0.0;
2349     sum10 = 0.0;
2350     sum11 = 0.0;
2351     sum12 = 0.0;
2352     sum13 = 0.0;
2353     sum14 = 0.0;
2354     sum15 = 0.0;
2355     sum16 = 0.0;
2356     sum17 = 0.0;
2357     sum18 = 0.0;
2358 
2359     nonzerorow += (n > 0);
2360     for (j = 0; j < n; j++) {
2361       sum1 += v[jrow] * x[18 * idx[jrow]];
2362       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2363       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2364       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2365       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2366       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2367       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2368       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2369       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2370       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2371       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2372       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2373       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2374       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2375       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2376       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2377       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2378       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2379       jrow++;
2380     }
2381     y[18 * i]      = sum1;
2382     y[18 * i + 1]  = sum2;
2383     y[18 * i + 2]  = sum3;
2384     y[18 * i + 3]  = sum4;
2385     y[18 * i + 4]  = sum5;
2386     y[18 * i + 5]  = sum6;
2387     y[18 * i + 6]  = sum7;
2388     y[18 * i + 7]  = sum8;
2389     y[18 * i + 8]  = sum9;
2390     y[18 * i + 9]  = sum10;
2391     y[18 * i + 10] = sum11;
2392     y[18 * i + 11] = sum12;
2393     y[18 * i + 12] = sum13;
2394     y[18 * i + 13] = sum14;
2395     y[18 * i + 14] = sum15;
2396     y[18 * i + 15] = sum16;
2397     y[18 * i + 16] = sum17;
2398     y[18 * i + 17] = sum18;
2399   }
2400 
2401   PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow));
2402   PetscCall(VecRestoreArrayRead(xx, &x));
2403   PetscCall(VecRestoreArray(yy, &y));
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2408 {
2409   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2410   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2411   const PetscScalar *x, *v;
2412   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2413   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2414   const PetscInt     m = b->AIJ->rmap->n, *idx;
2415   PetscInt           n, i;
2416 
2417   PetscFunctionBegin;
2418   PetscCall(VecSet(yy, 0.0));
2419   PetscCall(VecGetArrayRead(xx, &x));
2420   PetscCall(VecGetArray(yy, &y));
2421 
2422   for (i = 0; i < m; i++) {
2423     idx     = a->j + a->i[i];
2424     v       = a->a + a->i[i];
2425     n       = a->i[i + 1] - a->i[i];
2426     alpha1  = x[18 * i];
2427     alpha2  = x[18 * i + 1];
2428     alpha3  = x[18 * i + 2];
2429     alpha4  = x[18 * i + 3];
2430     alpha5  = x[18 * i + 4];
2431     alpha6  = x[18 * i + 5];
2432     alpha7  = x[18 * i + 6];
2433     alpha8  = x[18 * i + 7];
2434     alpha9  = x[18 * i + 8];
2435     alpha10 = x[18 * i + 9];
2436     alpha11 = x[18 * i + 10];
2437     alpha12 = x[18 * i + 11];
2438     alpha13 = x[18 * i + 12];
2439     alpha14 = x[18 * i + 13];
2440     alpha15 = x[18 * i + 14];
2441     alpha16 = x[18 * i + 15];
2442     alpha17 = x[18 * i + 16];
2443     alpha18 = x[18 * i + 17];
2444     while (n-- > 0) {
2445       y[18 * (*idx)] += alpha1 * (*v);
2446       y[18 * (*idx) + 1] += alpha2 * (*v);
2447       y[18 * (*idx) + 2] += alpha3 * (*v);
2448       y[18 * (*idx) + 3] += alpha4 * (*v);
2449       y[18 * (*idx) + 4] += alpha5 * (*v);
2450       y[18 * (*idx) + 5] += alpha6 * (*v);
2451       y[18 * (*idx) + 6] += alpha7 * (*v);
2452       y[18 * (*idx) + 7] += alpha8 * (*v);
2453       y[18 * (*idx) + 8] += alpha9 * (*v);
2454       y[18 * (*idx) + 9] += alpha10 * (*v);
2455       y[18 * (*idx) + 10] += alpha11 * (*v);
2456       y[18 * (*idx) + 11] += alpha12 * (*v);
2457       y[18 * (*idx) + 12] += alpha13 * (*v);
2458       y[18 * (*idx) + 13] += alpha14 * (*v);
2459       y[18 * (*idx) + 14] += alpha15 * (*v);
2460       y[18 * (*idx) + 15] += alpha16 * (*v);
2461       y[18 * (*idx) + 16] += alpha17 * (*v);
2462       y[18 * (*idx) + 17] += alpha18 * (*v);
2463       idx++;
2464       v++;
2465     }
2466   }
2467   PetscCall(PetscLogFlops(36.0 * a->nz));
2468   PetscCall(VecRestoreArrayRead(xx, &x));
2469   PetscCall(VecRestoreArray(yy, &y));
2470   PetscFunctionReturn(PETSC_SUCCESS);
2471 }
2472 
2473 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2474 {
2475   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2476   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2477   const PetscScalar *x, *v;
2478   PetscScalar       *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2479   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2480   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2481   PetscInt           n, i, jrow, j;
2482 
2483   PetscFunctionBegin;
2484   if (yy != zz) PetscCall(VecCopy(yy, zz));
2485   PetscCall(VecGetArrayRead(xx, &x));
2486   PetscCall(VecGetArray(zz, &y));
2487   idx = a->j;
2488   v   = a->a;
2489   ii  = a->i;
2490 
2491   for (i = 0; i < m; i++) {
2492     jrow  = ii[i];
2493     n     = ii[i + 1] - jrow;
2494     sum1  = 0.0;
2495     sum2  = 0.0;
2496     sum3  = 0.0;
2497     sum4  = 0.0;
2498     sum5  = 0.0;
2499     sum6  = 0.0;
2500     sum7  = 0.0;
2501     sum8  = 0.0;
2502     sum9  = 0.0;
2503     sum10 = 0.0;
2504     sum11 = 0.0;
2505     sum12 = 0.0;
2506     sum13 = 0.0;
2507     sum14 = 0.0;
2508     sum15 = 0.0;
2509     sum16 = 0.0;
2510     sum17 = 0.0;
2511     sum18 = 0.0;
2512     for (j = 0; j < n; j++) {
2513       sum1 += v[jrow] * x[18 * idx[jrow]];
2514       sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2515       sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2516       sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2517       sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2518       sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2519       sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2520       sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2521       sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2522       sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2523       sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2524       sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2525       sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2526       sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2527       sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2528       sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2529       sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2530       sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2531       jrow++;
2532     }
2533     y[18 * i] += sum1;
2534     y[18 * i + 1] += sum2;
2535     y[18 * i + 2] += sum3;
2536     y[18 * i + 3] += sum4;
2537     y[18 * i + 4] += sum5;
2538     y[18 * i + 5] += sum6;
2539     y[18 * i + 6] += sum7;
2540     y[18 * i + 7] += sum8;
2541     y[18 * i + 8] += sum9;
2542     y[18 * i + 9] += sum10;
2543     y[18 * i + 10] += sum11;
2544     y[18 * i + 11] += sum12;
2545     y[18 * i + 12] += sum13;
2546     y[18 * i + 13] += sum14;
2547     y[18 * i + 14] += sum15;
2548     y[18 * i + 15] += sum16;
2549     y[18 * i + 16] += sum17;
2550     y[18 * i + 17] += sum18;
2551   }
2552 
2553   PetscCall(PetscLogFlops(36.0 * a->nz));
2554   PetscCall(VecRestoreArrayRead(xx, &x));
2555   PetscCall(VecRestoreArray(zz, &y));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2560 {
2561   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2562   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2563   const PetscScalar *x, *v;
2564   PetscScalar       *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2565   PetscScalar        alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2566   const PetscInt     m = b->AIJ->rmap->n, *idx;
2567   PetscInt           n, i;
2568 
2569   PetscFunctionBegin;
2570   if (yy != zz) PetscCall(VecCopy(yy, zz));
2571   PetscCall(VecGetArrayRead(xx, &x));
2572   PetscCall(VecGetArray(zz, &y));
2573   for (i = 0; i < m; i++) {
2574     idx     = a->j + a->i[i];
2575     v       = a->a + a->i[i];
2576     n       = a->i[i + 1] - a->i[i];
2577     alpha1  = x[18 * i];
2578     alpha2  = x[18 * i + 1];
2579     alpha3  = x[18 * i + 2];
2580     alpha4  = x[18 * i + 3];
2581     alpha5  = x[18 * i + 4];
2582     alpha6  = x[18 * i + 5];
2583     alpha7  = x[18 * i + 6];
2584     alpha8  = x[18 * i + 7];
2585     alpha9  = x[18 * i + 8];
2586     alpha10 = x[18 * i + 9];
2587     alpha11 = x[18 * i + 10];
2588     alpha12 = x[18 * i + 11];
2589     alpha13 = x[18 * i + 12];
2590     alpha14 = x[18 * i + 13];
2591     alpha15 = x[18 * i + 14];
2592     alpha16 = x[18 * i + 15];
2593     alpha17 = x[18 * i + 16];
2594     alpha18 = x[18 * i + 17];
2595     while (n-- > 0) {
2596       y[18 * (*idx)] += alpha1 * (*v);
2597       y[18 * (*idx) + 1] += alpha2 * (*v);
2598       y[18 * (*idx) + 2] += alpha3 * (*v);
2599       y[18 * (*idx) + 3] += alpha4 * (*v);
2600       y[18 * (*idx) + 4] += alpha5 * (*v);
2601       y[18 * (*idx) + 5] += alpha6 * (*v);
2602       y[18 * (*idx) + 6] += alpha7 * (*v);
2603       y[18 * (*idx) + 7] += alpha8 * (*v);
2604       y[18 * (*idx) + 8] += alpha9 * (*v);
2605       y[18 * (*idx) + 9] += alpha10 * (*v);
2606       y[18 * (*idx) + 10] += alpha11 * (*v);
2607       y[18 * (*idx) + 11] += alpha12 * (*v);
2608       y[18 * (*idx) + 12] += alpha13 * (*v);
2609       y[18 * (*idx) + 13] += alpha14 * (*v);
2610       y[18 * (*idx) + 14] += alpha15 * (*v);
2611       y[18 * (*idx) + 15] += alpha16 * (*v);
2612       y[18 * (*idx) + 16] += alpha17 * (*v);
2613       y[18 * (*idx) + 17] += alpha18 * (*v);
2614       idx++;
2615       v++;
2616     }
2617   }
2618   PetscCall(PetscLogFlops(36.0 * a->nz));
2619   PetscCall(VecRestoreArrayRead(xx, &x));
2620   PetscCall(VecRestoreArray(zz, &y));
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2625 {
2626   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2627   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2628   const PetscScalar *x, *v;
2629   PetscScalar       *y, *sums;
2630   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2631   PetscInt           n, i, jrow, j, dof = b->dof, k;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(VecGetArrayRead(xx, &x));
2635   PetscCall(VecSet(yy, 0.0));
2636   PetscCall(VecGetArray(yy, &y));
2637   idx = a->j;
2638   v   = a->a;
2639   ii  = a->i;
2640 
2641   for (i = 0; i < m; i++) {
2642     jrow = ii[i];
2643     n    = ii[i + 1] - jrow;
2644     sums = y + dof * i;
2645     for (j = 0; j < n; j++) {
2646       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2647       jrow++;
2648     }
2649   }
2650 
2651   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2652   PetscCall(VecRestoreArrayRead(xx, &x));
2653   PetscCall(VecRestoreArray(yy, &y));
2654   PetscFunctionReturn(PETSC_SUCCESS);
2655 }
2656 
2657 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2658 {
2659   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2660   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2661   const PetscScalar *x, *v;
2662   PetscScalar       *y, *sums;
2663   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
2664   PetscInt           n, i, jrow, j, dof = b->dof, k;
2665 
2666   PetscFunctionBegin;
2667   if (yy != zz) PetscCall(VecCopy(yy, zz));
2668   PetscCall(VecGetArrayRead(xx, &x));
2669   PetscCall(VecGetArray(zz, &y));
2670   idx = a->j;
2671   v   = a->a;
2672   ii  = a->i;
2673 
2674   for (i = 0; i < m; i++) {
2675     jrow = ii[i];
2676     n    = ii[i + 1] - jrow;
2677     sums = y + dof * i;
2678     for (j = 0; j < n; j++) {
2679       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2680       jrow++;
2681     }
2682   }
2683 
2684   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2685   PetscCall(VecRestoreArrayRead(xx, &x));
2686   PetscCall(VecRestoreArray(zz, &y));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2691 {
2692   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2693   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2694   const PetscScalar *x, *v, *alpha;
2695   PetscScalar       *y;
2696   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2697   PetscInt           n, i, k;
2698 
2699   PetscFunctionBegin;
2700   PetscCall(VecGetArrayRead(xx, &x));
2701   PetscCall(VecSet(yy, 0.0));
2702   PetscCall(VecGetArray(yy, &y));
2703   for (i = 0; i < m; i++) {
2704     idx   = a->j + a->i[i];
2705     v     = a->a + a->i[i];
2706     n     = a->i[i + 1] - a->i[i];
2707     alpha = x + dof * i;
2708     while (n-- > 0) {
2709       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2710       idx++;
2711       v++;
2712     }
2713   }
2714   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2715   PetscCall(VecRestoreArrayRead(xx, &x));
2716   PetscCall(VecRestoreArray(yy, &y));
2717   PetscFunctionReturn(PETSC_SUCCESS);
2718 }
2719 
2720 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2721 {
2722   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
2723   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
2724   const PetscScalar *x, *v, *alpha;
2725   PetscScalar       *y;
2726   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
2727   PetscInt           n, i, k;
2728 
2729   PetscFunctionBegin;
2730   if (yy != zz) PetscCall(VecCopy(yy, zz));
2731   PetscCall(VecGetArrayRead(xx, &x));
2732   PetscCall(VecGetArray(zz, &y));
2733   for (i = 0; i < m; i++) {
2734     idx   = a->j + a->i[i];
2735     v     = a->a + a->i[i];
2736     n     = a->i[i + 1] - a->i[i];
2737     alpha = x + dof * i;
2738     while (n-- > 0) {
2739       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2740       idx++;
2741       v++;
2742     }
2743   }
2744   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
2745   PetscCall(VecRestoreArrayRead(xx, &x));
2746   PetscCall(VecRestoreArray(zz, &y));
2747   PetscFunctionReturn(PETSC_SUCCESS);
2748 }
2749 
2750 /*===================================================================================*/
2751 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2752 {
2753   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2754 
2755   PetscFunctionBegin;
2756   /* start the scatter */
2757   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2758   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
2759   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2760   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
2761   PetscFunctionReturn(PETSC_SUCCESS);
2762 }
2763 
2764 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2765 {
2766   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2767 
2768   PetscFunctionBegin;
2769   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2770   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
2771   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2772   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
2773   PetscFunctionReturn(PETSC_SUCCESS);
2774 }
2775 
2776 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2777 {
2778   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2779 
2780   PetscFunctionBegin;
2781   /* start the scatter */
2782   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2783   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
2784   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
2785   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
2786   PetscFunctionReturn(PETSC_SUCCESS);
2787 }
2788 
2789 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2790 {
2791   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2792 
2793   PetscFunctionBegin;
2794   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
2795   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
2796   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2797   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
2798   PetscFunctionReturn(PETSC_SUCCESS);
2799 }
2800 
2801 /* ----------------------------------------------------------------*/
2802 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2803 {
2804   Mat_Product *product = C->product;
2805 
2806   PetscFunctionBegin;
2807   if (product->type == MATPRODUCT_PtAP) {
2808     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2809   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
2810   PetscFunctionReturn(PETSC_SUCCESS);
2811 }
2812 
2813 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2814 {
2815   Mat_Product *product = C->product;
2816   PetscBool    flg     = PETSC_FALSE;
2817   Mat          A = product->A, P = product->B;
2818   PetscInt     alg = 1; /* set default algorithm */
2819 #if !defined(PETSC_HAVE_HYPRE)
2820   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
2821   PetscInt    nalg        = 4;
2822 #else
2823   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
2824   PetscInt    nalg        = 5;
2825 #endif
2826 
2827   PetscFunctionBegin;
2828   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
2829 
2830   /* PtAP */
2831   /* Check matrix local sizes */
2832   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2833              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
2834   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2835              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
2836 
2837   /* Set the default algorithm */
2838   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2839   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2840 
2841   /* Get runtime option */
2842   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2843   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
2844   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2845   PetscOptionsEnd();
2846 
2847   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
2848   if (flg) {
2849     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2850     PetscFunctionReturn(PETSC_SUCCESS);
2851   }
2852 
2853   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
2854   if (flg) {
2855     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2856     PetscFunctionReturn(PETSC_SUCCESS);
2857   }
2858 
2859   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2860   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
2861   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
2862   PetscCall(MatProductSetFromOptions(C));
2863   PetscFunctionReturn(PETSC_SUCCESS);
2864 }
2865 
2866 /* ----------------------------------------------------------------*/
2867 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
2868 {
2869   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2870   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
2871   Mat                P  = pp->AIJ;
2872   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2873   PetscInt          *pti, *ptj, *ptJ;
2874   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2875   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2876   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
2877   MatScalar         *ca;
2878   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
2879 
2880   PetscFunctionBegin;
2881   /* Get ij structure of P^T */
2882   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2883 
2884   cn = pn * ppdof;
2885   /* Allocate ci array, arrays for fill computation and */
2886   /* free space for accumulating nonzero column info */
2887   PetscCall(PetscMalloc1(cn + 1, &ci));
2888   ci[0] = 0;
2889 
2890   /* Work arrays for rows of P^T*A */
2891   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
2892   PetscCall(PetscArrayzero(ptadenserow, an));
2893   PetscCall(PetscArrayzero(denserow, cn));
2894 
2895   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2896   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2897   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2898   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
2899   current_space = free_space;
2900 
2901   /* Determine symbolic info for each row of C: */
2902   for (i = 0; i < pn; i++) {
2903     ptnzi = pti[i + 1] - pti[i];
2904     ptJ   = ptj + pti[i];
2905     for (dof = 0; dof < ppdof; dof++) {
2906       ptanzi = 0;
2907       /* Determine symbolic row of PtA: */
2908       for (j = 0; j < ptnzi; j++) {
2909         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2910         arow = ptJ[j] * ppdof + dof;
2911         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2912         anzj = ai[arow + 1] - ai[arow];
2913         ajj  = aj + ai[arow];
2914         for (k = 0; k < anzj; k++) {
2915           if (!ptadenserow[ajj[k]]) {
2916             ptadenserow[ajj[k]]    = -1;
2917             ptasparserow[ptanzi++] = ajj[k];
2918           }
2919         }
2920       }
2921       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2922       ptaj = ptasparserow;
2923       cnzi = 0;
2924       for (j = 0; j < ptanzi; j++) {
2925         /* Get offset within block of P */
2926         pshift = *ptaj % ppdof;
2927         /* Get block row of P */
2928         prow = (*ptaj++) / ppdof; /* integer division */
2929         /* P has same number of nonzeros per row as the compressed form */
2930         pnzj = pi[prow + 1] - pi[prow];
2931         pjj  = pj + pi[prow];
2932         for (k = 0; k < pnzj; k++) {
2933           /* Locations in C are shifted by the offset within the block */
2934           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2935           if (!denserow[pjj[k] * ppdof + pshift]) {
2936             denserow[pjj[k] * ppdof + pshift] = -1;
2937             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
2938           }
2939         }
2940       }
2941 
2942       /* sort sparserow */
2943       PetscCall(PetscSortInt(cnzi, sparserow));
2944 
2945       /* If free space is not available, make more free space */
2946       /* Double the amount of total space in the list */
2947       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
2948 
2949       /* Copy data into free space, and zero out denserows */
2950       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
2951 
2952       current_space->array += cnzi;
2953       current_space->local_used += cnzi;
2954       current_space->local_remaining -= cnzi;
2955 
2956       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2957       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
2958 
2959       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2960       /*        For now, we will recompute what is needed. */
2961       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
2962     }
2963   }
2964   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2965   /* Allocate space for cj, initialize cj, and */
2966   /* destroy list of free space and other temporary array(s) */
2967   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
2968   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
2969   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
2970 
2971   /* Allocate space for ca */
2972   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
2973 
2974   /* put together the new matrix */
2975   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
2976   PetscCall(MatSetBlockSize(C, pp->dof));
2977 
2978   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2979   /* Since these are PETSc arrays, change flags to free them as necessary. */
2980   c          = (Mat_SeqAIJ *)(C->data);
2981   c->free_a  = PETSC_TRUE;
2982   c->free_ij = PETSC_TRUE;
2983   c->nonew   = 0;
2984 
2985   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2986   C->ops->productnumeric = MatProductNumeric_PtAP;
2987 
2988   /* Clean up. */
2989   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
2990   PetscFunctionReturn(PETSC_SUCCESS);
2991 }
2992 
2993 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
2994 {
2995   /* This routine requires testing -- first draft only */
2996   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
2997   Mat              P  = pp->AIJ;
2998   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2999   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
3000   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
3001   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
3002   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
3003   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
3004   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
3005   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
3006   MatScalar       *ca = c->a, *caj, *apa;
3007 
3008   PetscFunctionBegin;
3009   /* Allocate temporary array for storage of one row of A*P */
3010   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
3011 
3012   /* Clear old values in C */
3013   PetscCall(PetscArrayzero(ca, ci[cm]));
3014 
3015   for (i = 0; i < am; i++) {
3016     /* Form sparse row of A*P */
3017     anzi  = ai[i + 1] - ai[i];
3018     apnzj = 0;
3019     for (j = 0; j < anzi; j++) {
3020       /* Get offset within block of P */
3021       pshift = *aj % ppdof;
3022       /* Get block row of P */
3023       prow = *aj++ / ppdof; /* integer division */
3024       pnzj = pi[prow + 1] - pi[prow];
3025       pjj  = pj + pi[prow];
3026       paj  = pa + pi[prow];
3027       for (k = 0; k < pnzj; k++) {
3028         poffset = pjj[k] * ppdof + pshift;
3029         if (!apjdense[poffset]) {
3030           apjdense[poffset] = -1;
3031           apj[apnzj++]      = poffset;
3032         }
3033         apa[poffset] += (*aa) * paj[k];
3034       }
3035       PetscCall(PetscLogFlops(2.0 * pnzj));
3036       aa++;
3037     }
3038 
3039     /* Sort the j index array for quick sparse axpy. */
3040     /* Note: a array does not need sorting as it is in dense storage locations. */
3041     PetscCall(PetscSortInt(apnzj, apj));
3042 
3043     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3044     prow    = i / ppdof; /* integer division */
3045     pshift  = i % ppdof;
3046     poffset = pi[prow];
3047     pnzi    = pi[prow + 1] - poffset;
3048     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3049     pJ = pj + poffset;
3050     pA = pa + poffset;
3051     for (j = 0; j < pnzi; j++) {
3052       crow = (*pJ) * ppdof + pshift;
3053       cjj  = cj + ci[crow];
3054       caj  = ca + ci[crow];
3055       pJ++;
3056       /* Perform sparse axpy operation.  Note cjj includes apj. */
3057       for (k = 0, nextap = 0; nextap < apnzj; k++) {
3058         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
3059       }
3060       PetscCall(PetscLogFlops(2.0 * apnzj));
3061       pA++;
3062     }
3063 
3064     /* Zero the current row info for A*P */
3065     for (j = 0; j < apnzj; j++) {
3066       apa[apj[j]]      = 0.;
3067       apjdense[apj[j]] = 0;
3068     }
3069   }
3070 
3071   /* Assemble the final matrix and clean up */
3072   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3073   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3074   PetscCall(PetscFree3(apa, apj, apjdense));
3075   PetscFunctionReturn(PETSC_SUCCESS);
3076 }
3077 
3078 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3079 {
3080   Mat_Product *product = C->product;
3081   Mat          A = product->A, P = product->B;
3082 
3083   PetscFunctionBegin;
3084   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
3085   PetscFunctionReturn(PETSC_SUCCESS);
3086 }
3087 
3088 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C)
3089 {
3090   PetscFunctionBegin;
3091   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3092 }
3093 
3094 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C)
3095 {
3096   PetscFunctionBegin;
3097   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3098 }
3099 
3100 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3101 
3102 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
3103 {
3104   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3105 
3106   PetscFunctionBegin;
3107 
3108   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
3109   PetscFunctionReturn(PETSC_SUCCESS);
3110 }
3111 
3112 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3113 
3114 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
3115 {
3116   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3117 
3118   PetscFunctionBegin;
3119   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
3120   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3121   PetscFunctionReturn(PETSC_SUCCESS);
3122 }
3123 
3124 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3125 
3126 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
3127 {
3128   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3129 
3130   PetscFunctionBegin;
3131 
3132   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3137 
3138 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
3139 {
3140   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3141 
3142   PetscFunctionBegin;
3143 
3144   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
3145   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3146   PetscFunctionReturn(PETSC_SUCCESS);
3147 }
3148 
3149 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3150 {
3151   Mat_Product *product = C->product;
3152   Mat          A = product->A, P = product->B;
3153   PetscBool    flg;
3154 
3155   PetscFunctionBegin;
3156   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
3157   if (flg) {
3158     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
3159     C->ops->productnumeric = MatProductNumeric_PtAP;
3160     PetscFunctionReturn(PETSC_SUCCESS);
3161   }
3162 
3163   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
3164   if (flg) {
3165     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
3166     C->ops->productnumeric = MatProductNumeric_PtAP;
3167     PetscFunctionReturn(PETSC_SUCCESS);
3168   }
3169 
3170   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3171 }
3172 
3173 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3174 {
3175   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
3176   Mat          a   = b->AIJ, B;
3177   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
3178   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3179   PetscInt    *cols;
3180   PetscScalar *vals;
3181 
3182   PetscFunctionBegin;
3183   PetscCall(MatGetSize(a, &m, &n));
3184   PetscCall(PetscMalloc1(dof * m, &ilen));
3185   for (i = 0; i < m; i++) {
3186     nmax = PetscMax(nmax, aij->ilen[i]);
3187     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
3188   }
3189   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
3190   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
3191   PetscCall(MatSetType(B, newtype));
3192   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
3193   PetscCall(PetscFree(ilen));
3194   PetscCall(PetscMalloc1(nmax, &icols));
3195   ii = 0;
3196   for (i = 0; i < m; i++) {
3197     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3198     for (j = 0; j < dof; j++) {
3199       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
3200       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3201       ii++;
3202     }
3203     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
3204   }
3205   PetscCall(PetscFree(icols));
3206   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3207   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3208 
3209   if (reuse == MAT_INPLACE_MATRIX) {
3210     PetscCall(MatHeaderReplace(A, &B));
3211   } else {
3212     *newmat = B;
3213   }
3214   PetscFunctionReturn(PETSC_SUCCESS);
3215 }
3216 
3217 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3218 
3219 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3220 {
3221   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
3222   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
3223   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
3224   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
3225   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
3226   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
3227   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
3228   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
3229   PetscInt     rstart, cstart, *garray, ii, k;
3230   PetscScalar *vals, *ovals;
3231 
3232   PetscFunctionBegin;
3233   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
3234   for (i = 0; i < A->rmap->n / dof; i++) {
3235     nmax  = PetscMax(nmax, AIJ->ilen[i]);
3236     onmax = PetscMax(onmax, OAIJ->ilen[i]);
3237     for (j = 0; j < dof; j++) {
3238       dnz[dof * i + j] = AIJ->ilen[i];
3239       onz[dof * i + j] = OAIJ->ilen[i];
3240     }
3241   }
3242   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3243   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3244   PetscCall(MatSetType(B, newtype));
3245   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
3246   PetscCall(MatSetBlockSize(B, dof));
3247   PetscCall(PetscFree2(dnz, onz));
3248 
3249   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
3250   rstart = dof * maij->A->rmap->rstart;
3251   cstart = dof * maij->A->cmap->rstart;
3252   garray = mpiaij->garray;
3253 
3254   ii = rstart;
3255   for (i = 0; i < A->rmap->n / dof; i++) {
3256     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3257     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3258     for (j = 0; j < dof; j++) {
3259       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3260       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
3261       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
3262       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
3263       ii++;
3264     }
3265     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
3266     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
3267   }
3268   PetscCall(PetscFree2(icols, oicols));
3269 
3270   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3271   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3272 
3273   if (reuse == MAT_INPLACE_MATRIX) {
3274     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3275     ((PetscObject)A)->refct = 1;
3276 
3277     PetscCall(MatHeaderReplace(A, &B));
3278 
3279     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3280   } else {
3281     *newmat = B;
3282   }
3283   PetscFunctionReturn(PETSC_SUCCESS);
3284 }
3285 
3286 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
3287 {
3288   Mat A;
3289 
3290   PetscFunctionBegin;
3291   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3292   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
3293   PetscCall(MatDestroy(&A));
3294   PetscFunctionReturn(PETSC_SUCCESS);
3295 }
3296 
3297 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
3298 {
3299   Mat A;
3300 
3301   PetscFunctionBegin;
3302   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
3303   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
3304   PetscCall(MatDestroy(&A));
3305   PetscFunctionReturn(PETSC_SUCCESS);
3306 }
3307 
3308 /* ---------------------------------------------------------------------------------- */
3309 /*@
3310   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3311   operations for multicomponent problems.  It interpolates each component the same
3312   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
3313   and `MATMPIAIJ` for distributed matrices.
3314 
3315   Collective
3316 
3317   Input Parameters:
3318 + A - the `MATAIJ` matrix describing the action on blocks
3319 - dof - the block size (number of components per node)
3320 
3321   Output Parameter:
3322 . maij - the new `MATMAIJ` matrix
3323 
3324   Operations provided:
3325 .vb
3326     MatMult()
3327     MatMultTranspose()
3328     MatMultAdd()
3329     MatMultTransposeAdd()
3330     MatView()
3331 .ve
3332 
3333   Level: advanced
3334 
3335 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3336 @*/
3337 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
3338 {
3339   PetscInt  n;
3340   Mat       B;
3341   PetscBool flg;
3342 #if defined(PETSC_HAVE_CUDA)
3343   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3344   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3345 #endif
3346 
3347   PetscFunctionBegin;
3348   dof = PetscAbs(dof);
3349   PetscCall(PetscObjectReference((PetscObject)A));
3350 
3351   if (dof == 1) *maij = A;
3352   else {
3353     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3354     /* propagate vec type */
3355     PetscCall(MatSetVecType(B, A->defaultvectype));
3356     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
3357     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
3358     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
3359     PetscCall(PetscLayoutSetUp(B->rmap));
3360     PetscCall(PetscLayoutSetUp(B->cmap));
3361 
3362     B->assembled = PETSC_TRUE;
3363 
3364     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
3365     if (flg) {
3366       Mat_SeqMAIJ *b;
3367 
3368       PetscCall(MatSetType(B, MATSEQMAIJ));
3369 
3370       B->ops->setup   = NULL;
3371       B->ops->destroy = MatDestroy_SeqMAIJ;
3372       B->ops->view    = MatView_SeqMAIJ;
3373 
3374       b      = (Mat_SeqMAIJ *)B->data;
3375       b->dof = dof;
3376       b->AIJ = A;
3377 
3378       if (dof == 2) {
3379         B->ops->mult             = MatMult_SeqMAIJ_2;
3380         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3381         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3382         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3383       } else if (dof == 3) {
3384         B->ops->mult             = MatMult_SeqMAIJ_3;
3385         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3386         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3387         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3388       } else if (dof == 4) {
3389         B->ops->mult             = MatMult_SeqMAIJ_4;
3390         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3391         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3392         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3393       } else if (dof == 5) {
3394         B->ops->mult             = MatMult_SeqMAIJ_5;
3395         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3396         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3397         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3398       } else if (dof == 6) {
3399         B->ops->mult             = MatMult_SeqMAIJ_6;
3400         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3401         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3402         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3403       } else if (dof == 7) {
3404         B->ops->mult             = MatMult_SeqMAIJ_7;
3405         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3406         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3407         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3408       } else if (dof == 8) {
3409         B->ops->mult             = MatMult_SeqMAIJ_8;
3410         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3411         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3412         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3413       } else if (dof == 9) {
3414         B->ops->mult             = MatMult_SeqMAIJ_9;
3415         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3416         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3417         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3418       } else if (dof == 10) {
3419         B->ops->mult             = MatMult_SeqMAIJ_10;
3420         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3421         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3422         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3423       } else if (dof == 11) {
3424         B->ops->mult             = MatMult_SeqMAIJ_11;
3425         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3426         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3427         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3428       } else if (dof == 16) {
3429         B->ops->mult             = MatMult_SeqMAIJ_16;
3430         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3431         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3432         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3433       } else if (dof == 18) {
3434         B->ops->mult             = MatMult_SeqMAIJ_18;
3435         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3436         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3437         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3438       } else {
3439         B->ops->mult             = MatMult_SeqMAIJ_N;
3440         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3441         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3442         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3443       }
3444 #if defined(PETSC_HAVE_CUDA)
3445       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
3446 #endif
3447       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
3448       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3449     } else {
3450       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3451       Mat_MPIMAIJ *b;
3452       IS           from, to;
3453       Vec          gvec;
3454 
3455       PetscCall(MatSetType(B, MATMPIMAIJ));
3456 
3457       B->ops->setup   = NULL;
3458       B->ops->destroy = MatDestroy_MPIMAIJ;
3459       B->ops->view    = MatView_MPIMAIJ;
3460 
3461       b      = (Mat_MPIMAIJ *)B->data;
3462       b->dof = dof;
3463       b->A   = A;
3464 
3465       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
3466       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
3467 
3468       PetscCall(VecGetSize(mpiaij->lvec, &n));
3469       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
3470       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
3471       PetscCall(VecSetBlockSize(b->w, dof));
3472       PetscCall(VecSetType(b->w, VECSEQ));
3473 
3474       /* create two temporary Index sets for build scatter gather */
3475       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
3476       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
3477 
3478       /* create temporary global vector to generate scatter context */
3479       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
3480 
3481       /* generate the scatter context */
3482       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
3483 
3484       PetscCall(ISDestroy(&from));
3485       PetscCall(ISDestroy(&to));
3486       PetscCall(VecDestroy(&gvec));
3487 
3488       B->ops->mult             = MatMult_MPIMAIJ_dof;
3489       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3490       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3491       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3492 
3493 #if defined(PETSC_HAVE_CUDA)
3494       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
3495 #endif
3496       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
3497       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3498     }
3499     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3500     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3501     PetscCall(MatSetUp(B));
3502 #if defined(PETSC_HAVE_CUDA)
3503     /* temporary until we have CUDA implementation of MAIJ */
3504     {
3505       PetscBool flg;
3506       if (convert) {
3507         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
3508         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
3509       }
3510     }
3511 #endif
3512     *maij = B;
3513   }
3514   PetscFunctionReturn(PETSC_SUCCESS);
3515 }
3516