xref: /petsc/src/mat/impls/maij/maij.c (revision ae51ba108530e1a8d2aeb8bc3771a24c2fcd801e)
1 
2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/utils/freespace.h>
4 
5 /*@
6    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7 
8    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9 
10    Input Parameter:
11 .  A - the `MATMAIJ` matrix
12 
13    Output Parameter:
14 .  B - the `MATAIJ` matrix
15 
16    Level: advanced
17 
18    Note:
19     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20 
21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22 @*/
23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24 {
25   PetscBool ismpimaij, isseqmaij;
26 
27   PetscFunctionBegin;
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
29   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30   if (ismpimaij) {
31     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32 
33     *B = b->A;
34   } else if (isseqmaij) {
35     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36 
37     *B = b->AIJ;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 /*@
45    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46 
47    Logically Collective
48 
49    Input Parameters:
50 +  A - the `MATMAIJ` matrix
51 -  dof - the block size for the new matrix
52 
53    Output Parameter:
54 .  B - the new `MATMAIJ` matrix
55 
56    Level: advanced
57 
58 .seealso: `MATMAIJ`, `MatCreateMAIJ()`
59 @*/
60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61 {
62   Mat Aij = NULL;
63 
64   PetscFunctionBegin;
65   PetscValidLogicalCollectiveInt(A, dof, 2);
66   PetscCall(MatMAIJGetAIJ(A, &Aij));
67   PetscCall(MatCreateMAIJ(Aij, dof, B));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72 {
73   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74 
75   PetscFunctionBegin;
76   PetscCall(MatDestroy(&b->AIJ));
77   PetscCall(PetscFree(A->data));
78   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatSetUp_MAIJ(Mat A)
85 {
86   PetscFunctionBegin;
87   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88 }
89 
90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91 {
92   Mat B;
93 
94   PetscFunctionBegin;
95   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
96   PetscCall(MatView(B, viewer));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107   PetscCall(MatView(B, viewer));
108   PetscCall(MatDestroy(&B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113 {
114   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatDestroy(&b->AIJ));
118   PetscCall(MatDestroy(&b->OAIJ));
119   PetscCall(MatDestroy(&b->A));
120   PetscCall(VecScatterDestroy(&b->ctx));
121   PetscCall(VecDestroy(&b->w));
122   PetscCall(PetscFree(A->data));
123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 /*MC
131   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132   multicomponent problems, interpolating or restricting each component the same way independently.
133   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134 
135   Operations provided:
136 .vb
137     MatMult()
138     MatMultTranspose()
139     MatMultAdd()
140     MatMultTransposeAdd()
141 .ve
142 
143   Level: advanced
144 
145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146 M*/
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149 {
150   Mat_MPIMAIJ *b;
151   PetscMPIInt  size;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscNew(&b));
155   A->data = (void *)b;
156 
157   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158 
159   A->ops->setup = MatSetUp_MAIJ;
160 
161   b->AIJ  = NULL;
162   b->dof  = 0;
163   b->OAIJ = NULL;
164   b->ctx  = NULL;
165   b->w    = NULL;
166   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167   if (size == 1) {
168     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169   } else {
170     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171   }
172   A->preallocated = PETSC_TRUE;
173   A->assembled    = PETSC_TRUE;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 #if PetscHasAttribute(always_inline)
178   #define PETSC_FORCE_INLINE __attribute__((always_inline))
179 #else
180   #define PETSC_FORCE_INLINE
181 #endif
182 enum {
183   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
184 };
185 
186 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
187 // keyword into account for these...
188 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
189 {
190   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
191   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
192   const Mat          baij       = b->AIJ;
193   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
194   const PetscInt     m          = baij->rmap->n;
195   const PetscInt     nz         = a->nz;
196   const PetscInt    *idx        = a->j;
197   const PetscInt    *ii         = a->i;
198   const PetscScalar *v          = a->a;
199   PetscInt           nonzerorow = 0;
200   const PetscScalar *x;
201   PetscScalar       *z;
202 
203   PetscFunctionBegin;
204   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
205   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
206   PetscCall(VecGetArrayRead(xx, &x));
207   if (mult_add) {
208     PetscCall(VecGetArray(zz, &z));
209   } else {
210     PetscCall(VecGetArrayWrite(zz, &z));
211   }
212 
213   for (PetscInt i = 0; i < m; ++i) {
214     PetscInt       jrow = ii[i];
215     const PetscInt n    = ii[i + 1] - jrow;
216     // leave a line so clang-format does not align these decls
217     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
218 
219     nonzerorow += n > 0;
220     for (PetscInt j = 0; j < n; ++j, ++jrow) {
221       const PetscScalar v_jrow     = v[jrow];
222       const PetscInt    N_idx_jrow = N * idx[jrow];
223 
224 #pragma unroll
225       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
226     }
227 
228 #pragma unroll
229     for (int k = 0; k < N; ++k) {
230       const PetscInt z_idx = N * i + k;
231 
232       if (mult_add) {
233         z[z_idx] += sum[k];
234       } else {
235         z[z_idx] = sum[k];
236       }
237     }
238   }
239   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
240   PetscCall(VecRestoreArrayRead(xx, &x));
241   if (mult_add) {
242     PetscCall(VecRestoreArray(zz, &z));
243   } else {
244     PetscCall(VecRestoreArrayWrite(zz, &z));
245   }
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
249 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
250 {
251   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
252   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
253   const Mat          baij     = b->AIJ;
254   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
255   const PetscInt     m        = baij->rmap->n;
256   const PetscInt     nz       = a->nz;
257   const PetscInt    *a_j      = a->j;
258   const PetscInt    *a_i      = a->i;
259   const PetscScalar *a_a      = a->a;
260   const PetscScalar *x;
261   PetscScalar       *z;
262 
263   PetscFunctionBegin;
264   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
265   if (mult_add) {
266     if (yy != zz) PetscCall(VecCopy(yy, zz));
267   } else {
268     PetscCall(VecSet(zz, 0.0));
269   }
270   PetscCall(VecGetArrayRead(xx, &x));
271   PetscCall(VecGetArray(zz, &z));
272 
273   for (PetscInt i = 0; i < m; i++) {
274     const PetscInt     a_ii = a_i[i];
275     const PetscInt    *idx  = a_j + a_ii;
276     const PetscScalar *v    = a_a + a_ii;
277     const PetscInt     n    = a_i[i + 1] - a_ii;
278     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
279 
280 #pragma unroll
281     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
282     for (PetscInt j = 0; j < n; ++j) {
283       const PetscInt    N_idx_j = N * idx[j];
284       const PetscScalar v_j     = v[j];
285 
286 #pragma unroll
287       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
288     }
289   }
290 
291   PetscCall(PetscLogFlops(2 * N * nz));
292   PetscCall(VecRestoreArrayRead(xx, &x));
293   PetscCall(VecRestoreArray(zz, &z));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 /* --------------------------------------------------------------------------------------*/
298 
299 static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
300 {
301   PetscFunctionBegin;
302   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2));
303   PetscFunctionReturn(PETSC_SUCCESS);
304 }
305 
306 static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
307 {
308   PetscFunctionBegin;
309   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2));
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
314 {
315   PetscFunctionBegin;
316   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2));
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
321 {
322   PetscFunctionBegin;
323   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 /* --------------------------------------------------------------------------------------*/
328 
329 static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
330 {
331   PetscFunctionBegin;
332   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
337 {
338   PetscFunctionBegin;
339   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3));
340   PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 
343 static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
344 {
345   PetscFunctionBegin;
346   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3));
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
351 {
352   PetscFunctionBegin;
353   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /* ------------------------------------------------------------------------------*/
358 
359 static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
360 {
361   PetscFunctionBegin;
362   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4));
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
367 {
368   PetscFunctionBegin;
369   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
374 {
375   PetscFunctionBegin;
376   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
381 {
382   PetscFunctionBegin;
383   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4));
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 /* ------------------------------------------------------------------------------*/
388 
389 static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
390 {
391   PetscFunctionBegin;
392   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
404 {
405   PetscFunctionBegin;
406   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5));
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
411 {
412   PetscFunctionBegin;
413   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5));
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 /* ------------------------------------------------------------------------------*/
418 
419 static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
420 {
421   PetscFunctionBegin;
422   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
427 {
428   PetscFunctionBegin;
429   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6));
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
434 {
435   PetscFunctionBegin;
436   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6));
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
441 {
442   PetscFunctionBegin;
443   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 /* ------------------------------------------------------------------------------*/
448 
449 static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
450 {
451   PetscFunctionBegin;
452   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7));
453   PetscFunctionReturn(PETSC_SUCCESS);
454 }
455 
456 static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
457 {
458   PetscFunctionBegin;
459   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7));
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
464 {
465   PetscFunctionBegin;
466   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7));
467   PetscFunctionReturn(PETSC_SUCCESS);
468 }
469 
470 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
471 {
472   PetscFunctionBegin;
473   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /* ------------------------------------------------------------------------------*/
478 
479 static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
480 {
481   PetscFunctionBegin;
482   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8));
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
486 static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
487 {
488   PetscFunctionBegin;
489   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8));
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
494 {
495   PetscFunctionBegin;
496   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8));
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
501 {
502   PetscFunctionBegin;
503   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8));
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 /* ------------------------------------------------------------------------------*/
508 
509 static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
510 {
511   PetscFunctionBegin;
512   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9));
513   PetscFunctionReturn(PETSC_SUCCESS);
514 }
515 
516 static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
517 {
518   PetscFunctionBegin;
519   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9));
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
524 {
525   PetscFunctionBegin;
526   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   PetscFunctionBegin;
533   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9));
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 /* ------------------------------------------------------------------------------*/
538 
539 static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
540 {
541   PetscFunctionBegin;
542   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10));
543   PetscFunctionReturn(PETSC_SUCCESS);
544 }
545 
546 static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
547 {
548   PetscFunctionBegin;
549   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10));
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
554 {
555   PetscFunctionBegin;
556   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
561 {
562   PetscFunctionBegin;
563   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 /*--------------------------------------------------------------------------------------------*/
568 
569 static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
570 {
571   PetscFunctionBegin;
572   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11));
573   PetscFunctionReturn(PETSC_SUCCESS);
574 }
575 
576 static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
577 {
578   PetscFunctionBegin;
579   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
584 {
585   PetscFunctionBegin;
586   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11));
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 
590 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
591 {
592   PetscFunctionBegin;
593   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11));
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 /*--------------------------------------------------------------------------------------------*/
598 
599 static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
600 {
601   PetscFunctionBegin;
602   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16));
603   PetscFunctionReturn(PETSC_SUCCESS);
604 }
605 
606 static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
607 {
608   PetscFunctionBegin;
609   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 /*--------------------------------------------------------------------------------------------*/
628 
629 static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
630 {
631   PetscFunctionBegin;
632   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 
636 static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
637 {
638   PetscFunctionBegin;
639   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18));
640   PetscFunctionReturn(PETSC_SUCCESS);
641 }
642 
643 static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
644 {
645   PetscFunctionBegin;
646   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
651 {
652   PetscFunctionBegin;
653   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18));
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 /*--------------------------------------------------------------------------------------------*/
658 
659 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
660 {
661   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
662   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
663   const PetscScalar *x, *v;
664   PetscScalar       *y, *sums;
665   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
666   PetscInt           n, i, jrow, j, dof = b->dof, k;
667 
668   PetscFunctionBegin;
669   PetscCall(VecGetArrayRead(xx, &x));
670   PetscCall(VecSet(yy, 0.0));
671   PetscCall(VecGetArray(yy, &y));
672   idx = a->j;
673   v   = a->a;
674   ii  = a->i;
675 
676   for (i = 0; i < m; i++) {
677     jrow = ii[i];
678     n    = ii[i + 1] - jrow;
679     sums = y + dof * i;
680     for (j = 0; j < n; j++) {
681       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
682       jrow++;
683     }
684   }
685 
686   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
687   PetscCall(VecRestoreArrayRead(xx, &x));
688   PetscCall(VecRestoreArray(yy, &y));
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
693 {
694   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
695   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
696   const PetscScalar *x, *v;
697   PetscScalar       *y, *sums;
698   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
699   PetscInt           n, i, jrow, j, dof = b->dof, k;
700 
701   PetscFunctionBegin;
702   if (yy != zz) PetscCall(VecCopy(yy, zz));
703   PetscCall(VecGetArrayRead(xx, &x));
704   PetscCall(VecGetArray(zz, &y));
705   idx = a->j;
706   v   = a->a;
707   ii  = a->i;
708 
709   for (i = 0; i < m; i++) {
710     jrow = ii[i];
711     n    = ii[i + 1] - jrow;
712     sums = y + dof * i;
713     for (j = 0; j < n; j++) {
714       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
715       jrow++;
716     }
717   }
718 
719   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
720   PetscCall(VecRestoreArrayRead(xx, &x));
721   PetscCall(VecRestoreArray(zz, &y));
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
725 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
726 {
727   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
728   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
729   const PetscScalar *x, *v, *alpha;
730   PetscScalar       *y;
731   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
732   PetscInt           n, i, k;
733 
734   PetscFunctionBegin;
735   PetscCall(VecGetArrayRead(xx, &x));
736   PetscCall(VecSet(yy, 0.0));
737   PetscCall(VecGetArray(yy, &y));
738   for (i = 0; i < m; i++) {
739     idx   = a->j + a->i[i];
740     v     = a->a + a->i[i];
741     n     = a->i[i + 1] - a->i[i];
742     alpha = x + dof * i;
743     while (n-- > 0) {
744       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
745       idx++;
746       v++;
747     }
748   }
749   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
750   PetscCall(VecRestoreArrayRead(xx, &x));
751   PetscCall(VecRestoreArray(yy, &y));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
756 {
757   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
758   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
759   const PetscScalar *x, *v, *alpha;
760   PetscScalar       *y;
761   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
762   PetscInt           n, i, k;
763 
764   PetscFunctionBegin;
765   if (yy != zz) PetscCall(VecCopy(yy, zz));
766   PetscCall(VecGetArrayRead(xx, &x));
767   PetscCall(VecGetArray(zz, &y));
768   for (i = 0; i < m; i++) {
769     idx   = a->j + a->i[i];
770     v     = a->a + a->i[i];
771     n     = a->i[i + 1] - a->i[i];
772     alpha = x + dof * i;
773     while (n-- > 0) {
774       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
775       idx++;
776       v++;
777     }
778   }
779   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
780   PetscCall(VecRestoreArrayRead(xx, &x));
781   PetscCall(VecRestoreArray(zz, &y));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 /*--------------------------------------------------------------------------------------------*/
786 
787 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
788 {
789   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
790 
791   PetscFunctionBegin;
792   /* start the scatter */
793   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
794   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
795   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
796   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
797   PetscFunctionReturn(PETSC_SUCCESS);
798 }
799 
800 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
801 {
802   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
803 
804   PetscFunctionBegin;
805   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
806   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
807   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
808   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
813 {
814   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
815 
816   PetscFunctionBegin;
817   /* start the scatter */
818   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
819   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
820   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
821   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
822   PetscFunctionReturn(PETSC_SUCCESS);
823 }
824 
825 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
826 {
827   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
828 
829   PetscFunctionBegin;
830   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
831   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
832   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
833   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 /* ----------------------------------------------------------------*/
838 
839 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
840 {
841   Mat_Product *product = C->product;
842 
843   PetscFunctionBegin;
844   if (product->type == MATPRODUCT_PtAP) {
845     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
846   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
851 {
852   Mat_Product *product = C->product;
853   PetscBool    flg     = PETSC_FALSE;
854   Mat          A = product->A, P = product->B;
855   PetscInt     alg = 1; /* set default algorithm */
856 #if !defined(PETSC_HAVE_HYPRE)
857   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
858   PetscInt    nalg        = 4;
859 #else
860   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
861   PetscInt    nalg        = 5;
862 #endif
863 
864   PetscFunctionBegin;
865   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
866 
867   /* PtAP */
868   /* Check matrix local sizes */
869   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
870              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
871   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
872              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
873 
874   /* Set the default algorithm */
875   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
876   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
877 
878   /* Get runtime option */
879   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
880   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
881   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
882   PetscOptionsEnd();
883 
884   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
885   if (flg) {
886     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
887     PetscFunctionReturn(PETSC_SUCCESS);
888   }
889 
890   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
891   if (flg) {
892     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
893     PetscFunctionReturn(PETSC_SUCCESS);
894   }
895 
896   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
897   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
898   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
899   PetscCall(MatProductSetFromOptions(C));
900   PetscFunctionReturn(PETSC_SUCCESS);
901 }
902 
903 /* ----------------------------------------------------------------*/
904 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
905 {
906   /* This routine requires testing -- first draft only */
907   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
908   Mat              P  = pp->AIJ;
909   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
910   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
911   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
912   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
913   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
914   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
915   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
916   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
917   MatScalar       *ca = c->a, *caj, *apa;
918 
919   PetscFunctionBegin;
920   /* Allocate temporary array for storage of one row of A*P */
921   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
922 
923   /* Clear old values in C */
924   PetscCall(PetscArrayzero(ca, ci[cm]));
925 
926   for (i = 0; i < am; i++) {
927     /* Form sparse row of A*P */
928     anzi  = ai[i + 1] - ai[i];
929     apnzj = 0;
930     for (j = 0; j < anzi; j++) {
931       /* Get offset within block of P */
932       pshift = *aj % ppdof;
933       /* Get block row of P */
934       prow = *aj++ / ppdof; /* integer division */
935       pnzj = pi[prow + 1] - pi[prow];
936       pjj  = pj + pi[prow];
937       paj  = pa + pi[prow];
938       for (k = 0; k < pnzj; k++) {
939         poffset = pjj[k] * ppdof + pshift;
940         if (!apjdense[poffset]) {
941           apjdense[poffset] = -1;
942           apj[apnzj++]      = poffset;
943         }
944         apa[poffset] += (*aa) * paj[k];
945       }
946       PetscCall(PetscLogFlops(2.0 * pnzj));
947       aa++;
948     }
949 
950     /* Sort the j index array for quick sparse axpy. */
951     /* Note: a array does not need sorting as it is in dense storage locations. */
952     PetscCall(PetscSortInt(apnzj, apj));
953 
954     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
955     prow    = i / ppdof; /* integer division */
956     pshift  = i % ppdof;
957     poffset = pi[prow];
958     pnzi    = pi[prow + 1] - poffset;
959     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
960     pJ = pj + poffset;
961     pA = pa + poffset;
962     for (j = 0; j < pnzi; j++) {
963       crow = (*pJ) * ppdof + pshift;
964       cjj  = cj + ci[crow];
965       caj  = ca + ci[crow];
966       pJ++;
967       /* Perform sparse axpy operation.  Note cjj includes apj. */
968       for (k = 0, nextap = 0; nextap < apnzj; k++) {
969         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
970       }
971       PetscCall(PetscLogFlops(2.0 * apnzj));
972       pA++;
973     }
974 
975     /* Zero the current row info for A*P */
976     for (j = 0; j < apnzj; j++) {
977       apa[apj[j]]      = 0.;
978       apjdense[apj[j]] = 0;
979     }
980   }
981 
982   /* Assemble the final matrix and clean up */
983   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
984   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
985   PetscCall(PetscFree3(apa, apj, apjdense));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
990 {
991   PetscFreeSpaceList free_space = NULL, current_space = NULL;
992   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
993   Mat                P  = pp->AIJ;
994   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
995   PetscInt          *pti, *ptj, *ptJ;
996   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
997   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
998   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
999   MatScalar         *ca;
1000   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
1001 
1002   PetscFunctionBegin;
1003   /* Get ij structure of P^T */
1004   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
1005 
1006   cn = pn * ppdof;
1007   /* Allocate ci array, arrays for fill computation and */
1008   /* free space for accumulating nonzero column info */
1009   PetscCall(PetscMalloc1(cn + 1, &ci));
1010   ci[0] = 0;
1011 
1012   /* Work arrays for rows of P^T*A */
1013   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
1014   PetscCall(PetscArrayzero(ptadenserow, an));
1015   PetscCall(PetscArrayzero(denserow, cn));
1016 
1017   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1018   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1019   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
1020   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
1021   current_space = free_space;
1022 
1023   /* Determine symbolic info for each row of C: */
1024   for (i = 0; i < pn; i++) {
1025     ptnzi = pti[i + 1] - pti[i];
1026     ptJ   = ptj + pti[i];
1027     for (dof = 0; dof < ppdof; dof++) {
1028       ptanzi = 0;
1029       /* Determine symbolic row of PtA: */
1030       for (j = 0; j < ptnzi; j++) {
1031         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
1032         arow = ptJ[j] * ppdof + dof;
1033         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
1034         anzj = ai[arow + 1] - ai[arow];
1035         ajj  = aj + ai[arow];
1036         for (k = 0; k < anzj; k++) {
1037           if (!ptadenserow[ajj[k]]) {
1038             ptadenserow[ajj[k]]    = -1;
1039             ptasparserow[ptanzi++] = ajj[k];
1040           }
1041         }
1042       }
1043       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1044       ptaj = ptasparserow;
1045       cnzi = 0;
1046       for (j = 0; j < ptanzi; j++) {
1047         /* Get offset within block of P */
1048         pshift = *ptaj % ppdof;
1049         /* Get block row of P */
1050         prow = (*ptaj++) / ppdof; /* integer division */
1051         /* P has same number of nonzeros per row as the compressed form */
1052         pnzj = pi[prow + 1] - pi[prow];
1053         pjj  = pj + pi[prow];
1054         for (k = 0; k < pnzj; k++) {
1055           /* Locations in C are shifted by the offset within the block */
1056           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
1057           if (!denserow[pjj[k] * ppdof + pshift]) {
1058             denserow[pjj[k] * ppdof + pshift] = -1;
1059             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
1060           }
1061         }
1062       }
1063 
1064       /* sort sparserow */
1065       PetscCall(PetscSortInt(cnzi, sparserow));
1066 
1067       /* If free space is not available, make more free space */
1068       /* Double the amount of total space in the list */
1069       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
1070 
1071       /* Copy data into free space, and zero out denserows */
1072       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
1073 
1074       current_space->array += cnzi;
1075       current_space->local_used += cnzi;
1076       current_space->local_remaining -= cnzi;
1077 
1078       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
1079       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
1080 
1081       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1082       /*        For now, we will recompute what is needed. */
1083       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
1084     }
1085   }
1086   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1087   /* Allocate space for cj, initialize cj, and */
1088   /* destroy list of free space and other temporary array(s) */
1089   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
1090   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
1091   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
1092 
1093   /* Allocate space for ca */
1094   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
1095 
1096   /* put together the new matrix */
1097   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
1098   PetscCall(MatSetBlockSize(C, pp->dof));
1099 
1100   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1101   /* Since these are PETSc arrays, change flags to free them as necessary. */
1102   c          = (Mat_SeqAIJ *)(C->data);
1103   c->free_a  = PETSC_TRUE;
1104   c->free_ij = PETSC_TRUE;
1105   c->nonew   = 0;
1106 
1107   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
1108   C->ops->productnumeric = MatProductNumeric_PtAP;
1109 
1110   /* Clean up. */
1111   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
1112   PetscFunctionReturn(PETSC_SUCCESS);
1113 }
1114 
1115 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
1116 {
1117   Mat_Product *product = C->product;
1118   Mat          A = product->A, P = product->B;
1119 
1120   PetscFunctionBegin;
1121   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
1126 
1127 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
1128 {
1129   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1130 
1131   PetscFunctionBegin;
1132 
1133   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
1138 
1139 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
1140 {
1141   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
1145   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
1146   PetscFunctionReturn(PETSC_SUCCESS);
1147 }
1148 
1149 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
1150 
1151 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
1152 {
1153   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1154 
1155   PetscFunctionBegin;
1156 
1157   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
1162 
1163 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
1164 {
1165   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1166 
1167   PetscFunctionBegin;
1168 
1169   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
1170   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
1174 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
1175 {
1176   Mat_Product *product = C->product;
1177   Mat          A = product->A, P = product->B;
1178   PetscBool    flg;
1179 
1180   PetscFunctionBegin;
1181   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
1182   if (flg) {
1183     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
1184     C->ops->productnumeric = MatProductNumeric_PtAP;
1185     PetscFunctionReturn(PETSC_SUCCESS);
1186   }
1187 
1188   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
1189   if (flg) {
1190     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
1191     C->ops->productnumeric = MatProductNumeric_PtAP;
1192     PetscFunctionReturn(PETSC_SUCCESS);
1193   }
1194 
1195   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
1196 }
1197 
1198 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1199 {
1200   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
1201   Mat          a   = b->AIJ, B;
1202   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
1203   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
1204   PetscInt    *cols;
1205   PetscScalar *vals;
1206 
1207   PetscFunctionBegin;
1208   PetscCall(MatGetSize(a, &m, &n));
1209   PetscCall(PetscMalloc1(dof * m, &ilen));
1210   for (i = 0; i < m; i++) {
1211     nmax = PetscMax(nmax, aij->ilen[i]);
1212     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
1213   }
1214   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
1215   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
1216   PetscCall(MatSetType(B, newtype));
1217   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
1218   PetscCall(PetscFree(ilen));
1219   PetscCall(PetscMalloc1(nmax, &icols));
1220   ii = 0;
1221   for (i = 0; i < m; i++) {
1222     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
1223     for (j = 0; j < dof; j++) {
1224       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
1225       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
1226       ii++;
1227     }
1228     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
1229   }
1230   PetscCall(PetscFree(icols));
1231   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1232   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1233 
1234   if (reuse == MAT_INPLACE_MATRIX) {
1235     PetscCall(MatHeaderReplace(A, &B));
1236   } else {
1237     *newmat = B;
1238   }
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 #include <../src/mat/impls/aij/mpi/mpiaij.h>
1243 
1244 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1245 {
1246   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
1247   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
1248   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
1249   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
1250   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
1251   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
1252   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
1253   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
1254   PetscInt     rstart, cstart, *garray, ii, k;
1255   PetscScalar *vals, *ovals;
1256 
1257   PetscFunctionBegin;
1258   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
1259   for (i = 0; i < A->rmap->n / dof; i++) {
1260     nmax  = PetscMax(nmax, AIJ->ilen[i]);
1261     onmax = PetscMax(onmax, OAIJ->ilen[i]);
1262     for (j = 0; j < dof; j++) {
1263       dnz[dof * i + j] = AIJ->ilen[i];
1264       onz[dof * i + j] = OAIJ->ilen[i];
1265     }
1266   }
1267   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1268   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1269   PetscCall(MatSetType(B, newtype));
1270   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
1271   PetscCall(MatSetBlockSize(B, dof));
1272   PetscCall(PetscFree2(dnz, onz));
1273 
1274   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
1275   rstart = dof * maij->A->rmap->rstart;
1276   cstart = dof * maij->A->cmap->rstart;
1277   garray = mpiaij->garray;
1278 
1279   ii = rstart;
1280   for (i = 0; i < A->rmap->n / dof; i++) {
1281     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
1282     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
1283     for (j = 0; j < dof; j++) {
1284       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
1285       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
1286       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
1287       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
1288       ii++;
1289     }
1290     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
1291     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
1292   }
1293   PetscCall(PetscFree2(icols, oicols));
1294 
1295   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1296   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1297 
1298   if (reuse == MAT_INPLACE_MATRIX) {
1299     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
1300     ((PetscObject)A)->refct = 1;
1301 
1302     PetscCall(MatHeaderReplace(A, &B));
1303 
1304     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
1305   } else {
1306     *newmat = B;
1307   }
1308   PetscFunctionReturn(PETSC_SUCCESS);
1309 }
1310 
1311 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1312 {
1313   Mat A;
1314 
1315   PetscFunctionBegin;
1316   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1317   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1318   PetscCall(MatDestroy(&A));
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1323 {
1324   Mat A;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1328   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1329   PetscCall(MatDestroy(&A));
1330   PetscFunctionReturn(PETSC_SUCCESS);
1331 }
1332 
1333 /* ---------------------------------------------------------------------------------- */
1334 /*@
1335   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1336   operations for multicomponent problems.  It interpolates each component the same
1337   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1338   and `MATMPIAIJ` for distributed matrices.
1339 
1340   Collective
1341 
1342   Input Parameters:
1343 + A - the `MATAIJ` matrix describing the action on blocks
1344 - dof - the block size (number of components per node)
1345 
1346   Output Parameter:
1347 . maij - the new `MATMAIJ` matrix
1348 
1349   Operations provided:
1350 .vb
1351     MatMult()
1352     MatMultTranspose()
1353     MatMultAdd()
1354     MatMultTransposeAdd()
1355     MatView()
1356 .ve
1357 
1358   Level: advanced
1359 
1360 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
1361 @*/
1362 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1363 {
1364   PetscInt  n;
1365   Mat       B;
1366   PetscBool flg;
1367 #if defined(PETSC_HAVE_CUDA)
1368   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1369   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1370 #endif
1371 
1372   PetscFunctionBegin;
1373   dof = PetscAbs(dof);
1374   PetscCall(PetscObjectReference((PetscObject)A));
1375 
1376   if (dof == 1) *maij = A;
1377   else {
1378     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1379     /* propagate vec type */
1380     PetscCall(MatSetVecType(B, A->defaultvectype));
1381     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1382     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1383     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1384     PetscCall(PetscLayoutSetUp(B->rmap));
1385     PetscCall(PetscLayoutSetUp(B->cmap));
1386 
1387     B->assembled = PETSC_TRUE;
1388 
1389     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1390     if (flg) {
1391       Mat_SeqMAIJ *b;
1392 
1393       PetscCall(MatSetType(B, MATSEQMAIJ));
1394 
1395       B->ops->setup   = NULL;
1396       B->ops->destroy = MatDestroy_SeqMAIJ;
1397       B->ops->view    = MatView_SeqMAIJ;
1398 
1399       b      = (Mat_SeqMAIJ *)B->data;
1400       b->dof = dof;
1401       b->AIJ = A;
1402 
1403       if (dof == 2) {
1404         B->ops->mult             = MatMult_SeqMAIJ_2;
1405         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1406         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1407         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1408       } else if (dof == 3) {
1409         B->ops->mult             = MatMult_SeqMAIJ_3;
1410         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1411         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1412         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1413       } else if (dof == 4) {
1414         B->ops->mult             = MatMult_SeqMAIJ_4;
1415         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1416         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1417         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1418       } else if (dof == 5) {
1419         B->ops->mult             = MatMult_SeqMAIJ_5;
1420         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1421         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1422         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1423       } else if (dof == 6) {
1424         B->ops->mult             = MatMult_SeqMAIJ_6;
1425         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1426         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1427         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1428       } else if (dof == 7) {
1429         B->ops->mult             = MatMult_SeqMAIJ_7;
1430         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1431         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1432         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1433       } else if (dof == 8) {
1434         B->ops->mult             = MatMult_SeqMAIJ_8;
1435         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1436         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1437         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1438       } else if (dof == 9) {
1439         B->ops->mult             = MatMult_SeqMAIJ_9;
1440         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1441         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1442         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1443       } else if (dof == 10) {
1444         B->ops->mult             = MatMult_SeqMAIJ_10;
1445         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1446         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1447         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1448       } else if (dof == 11) {
1449         B->ops->mult             = MatMult_SeqMAIJ_11;
1450         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1451         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1452         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1453       } else if (dof == 16) {
1454         B->ops->mult             = MatMult_SeqMAIJ_16;
1455         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1456         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1457         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1458       } else if (dof == 18) {
1459         B->ops->mult             = MatMult_SeqMAIJ_18;
1460         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1461         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1462         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1463       } else {
1464         B->ops->mult             = MatMult_SeqMAIJ_N;
1465         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1466         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1467         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1468       }
1469 #if defined(PETSC_HAVE_CUDA)
1470       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1471 #endif
1472       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1473       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1474     } else {
1475       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1476       Mat_MPIMAIJ *b;
1477       IS           from, to;
1478       Vec          gvec;
1479 
1480       PetscCall(MatSetType(B, MATMPIMAIJ));
1481 
1482       B->ops->setup   = NULL;
1483       B->ops->destroy = MatDestroy_MPIMAIJ;
1484       B->ops->view    = MatView_MPIMAIJ;
1485 
1486       b      = (Mat_MPIMAIJ *)B->data;
1487       b->dof = dof;
1488       b->A   = A;
1489 
1490       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1491       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1492 
1493       PetscCall(VecGetSize(mpiaij->lvec, &n));
1494       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1495       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1496       PetscCall(VecSetBlockSize(b->w, dof));
1497       PetscCall(VecSetType(b->w, VECSEQ));
1498 
1499       /* create two temporary Index sets for build scatter gather */
1500       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1501       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1502 
1503       /* create temporary global vector to generate scatter context */
1504       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1505 
1506       /* generate the scatter context */
1507       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1508 
1509       PetscCall(ISDestroy(&from));
1510       PetscCall(ISDestroy(&to));
1511       PetscCall(VecDestroy(&gvec));
1512 
1513       B->ops->mult             = MatMult_MPIMAIJ_dof;
1514       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1515       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1516       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1517 
1518 #if defined(PETSC_HAVE_CUDA)
1519       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1520 #endif
1521       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1522       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1523     }
1524     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1525     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1526     PetscCall(MatSetUp(B));
1527 #if defined(PETSC_HAVE_CUDA)
1528     /* temporary until we have CUDA implementation of MAIJ */
1529     {
1530       PetscBool flg;
1531       if (convert) {
1532         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1533         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1534       }
1535     }
1536 #endif
1537     *maij = B;
1538   }
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541