xref: /petsc/src/mat/impls/maij/maij.c (revision bb5b24f58d6a1ffb5e6df85fcf5defc57450a95a)
1 
2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/utils/freespace.h>
4 
5 /*@
6    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7 
8    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9 
10    Input Parameter:
11 .  A - the `MATMAIJ` matrix
12 
13    Output Parameter:
14 .  B - the `MATAIJ` matrix
15 
16    Level: advanced
17 
18    Note:
19     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20 
21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22 @*/
23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24 {
25   PetscBool ismpimaij, isseqmaij;
26 
27   PetscFunctionBegin;
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
29   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30   if (ismpimaij) {
31     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32 
33     *B = b->A;
34   } else if (isseqmaij) {
35     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36 
37     *B = b->AIJ;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 /*@
45    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46 
47    Logically Collective
48 
49    Input Parameters:
50 +  A - the `MATMAIJ` matrix
51 -  dof - the block size for the new matrix
52 
53    Output Parameter:
54 .  B - the new `MATMAIJ` matrix
55 
56    Level: advanced
57 
58 .seealso: `MATMAIJ`, `MatCreateMAIJ()`
59 @*/
60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61 {
62   Mat Aij = NULL;
63 
64   PetscFunctionBegin;
65   PetscValidLogicalCollectiveInt(A, dof, 2);
66   PetscCall(MatMAIJGetAIJ(A, &Aij));
67   PetscCall(MatCreateMAIJ(Aij, dof, B));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72 {
73   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74 
75   PetscFunctionBegin;
76   PetscCall(MatDestroy(&b->AIJ));
77   PetscCall(PetscFree(A->data));
78   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatSetUp_MAIJ(Mat A)
85 {
86   PetscFunctionBegin;
87   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88 }
89 
90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91 {
92   Mat B;
93 
94   PetscFunctionBegin;
95   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
96   PetscCall(MatView(B, viewer));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107   PetscCall(MatView(B, viewer));
108   PetscCall(MatDestroy(&B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113 {
114   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatDestroy(&b->AIJ));
118   PetscCall(MatDestroy(&b->OAIJ));
119   PetscCall(MatDestroy(&b->A));
120   PetscCall(VecScatterDestroy(&b->ctx));
121   PetscCall(VecDestroy(&b->w));
122   PetscCall(PetscFree(A->data));
123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 /*MC
131   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132   multicomponent problems, interpolating or restricting each component the same way independently.
133   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134 
135   Operations provided:
136 .vb
137     MatMult()
138     MatMultTranspose()
139     MatMultAdd()
140     MatMultTransposeAdd()
141 .ve
142 
143   Level: advanced
144 
145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146 M*/
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149 {
150   Mat_MPIMAIJ *b;
151   PetscMPIInt  size;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscNew(&b));
155   A->data = (void *)b;
156 
157   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158 
159   A->ops->setup = MatSetUp_MAIJ;
160 
161   b->AIJ  = NULL;
162   b->dof  = 0;
163   b->OAIJ = NULL;
164   b->ctx  = NULL;
165   b->w    = NULL;
166   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167   if (size == 1) {
168     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169   } else {
170     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171   }
172   A->preallocated = PETSC_TRUE;
173   A->assembled    = PETSC_TRUE;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 #if PetscHasAttribute(always_inline)
178   #define PETSC_FORCE_INLINE __attribute__((always_inline))
179 #else
180   #define PETSC_FORCE_INLINE
181 #endif
182 
183 #if defined(__clang__) || defined(__INTEL_COMPILER) || defined(__ICC)
184   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
185 #else
186   #define PETSC_PRAGMA_UNROLL
187 #endif
188 
189 enum {
190   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
191 };
192 
193 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
194 // keyword into account for these...
195 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
196 {
197   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
198   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
199   const Mat          baij       = b->AIJ;
200   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
201   const PetscInt     m          = baij->rmap->n;
202   const PetscInt     nz         = a->nz;
203   const PetscInt    *idx        = a->j;
204   const PetscInt    *ii         = a->i;
205   const PetscScalar *v          = a->a;
206   PetscInt           nonzerorow = 0;
207   const PetscScalar *x;
208   PetscScalar       *z;
209 
210   PetscFunctionBegin;
211   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
212   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
213   PetscCall(VecGetArrayRead(xx, &x));
214   if (mult_add) {
215     PetscCall(VecGetArray(zz, &z));
216   } else {
217     PetscCall(VecGetArrayWrite(zz, &z));
218   }
219 
220   for (PetscInt i = 0; i < m; ++i) {
221     PetscInt       jrow = ii[i];
222     const PetscInt n    = ii[i + 1] - jrow;
223     // leave a line so clang-format does not align these decls
224     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
225 
226     nonzerorow += n > 0;
227     for (PetscInt j = 0; j < n; ++j, ++jrow) {
228       const PetscScalar v_jrow     = v[jrow];
229       const PetscInt    N_idx_jrow = N * idx[jrow];
230 
231       PETSC_PRAGMA_UNROLL
232       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
233     }
234 
235     PETSC_PRAGMA_UNROLL
236     for (int k = 0; k < N; ++k) {
237       const PetscInt z_idx = N * i + k;
238 
239       if (mult_add) {
240         z[z_idx] += sum[k];
241       } else {
242         z[z_idx] = sum[k];
243       }
244     }
245   }
246   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
247   PetscCall(VecRestoreArrayRead(xx, &x));
248   if (mult_add) {
249     PetscCall(VecRestoreArray(zz, &z));
250   } else {
251     PetscCall(VecRestoreArrayWrite(zz, &z));
252   }
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
257 {
258   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
259   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
260   const Mat          baij     = b->AIJ;
261   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
262   const PetscInt     m        = baij->rmap->n;
263   const PetscInt     nz       = a->nz;
264   const PetscInt    *a_j      = a->j;
265   const PetscInt    *a_i      = a->i;
266   const PetscScalar *a_a      = a->a;
267   const PetscScalar *x;
268   PetscScalar       *z;
269 
270   PetscFunctionBegin;
271   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
272   if (mult_add) {
273     if (yy != zz) PetscCall(VecCopy(yy, zz));
274   } else {
275     PetscCall(VecSet(zz, 0.0));
276   }
277   PetscCall(VecGetArrayRead(xx, &x));
278   PetscCall(VecGetArray(zz, &z));
279 
280   for (PetscInt i = 0; i < m; i++) {
281     const PetscInt     a_ii = a_i[i];
282     const PetscInt    *idx  = a_j + a_ii;
283     const PetscScalar *v    = a_a + a_ii;
284     const PetscInt     n    = a_i[i + 1] - a_ii;
285     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
286 
287     PETSC_PRAGMA_UNROLL
288     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
289     for (PetscInt j = 0; j < n; ++j) {
290       const PetscInt    N_idx_j = N * idx[j];
291       const PetscScalar v_j     = v[j];
292 
293       PETSC_PRAGMA_UNROLL
294       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
295     }
296   }
297 
298   PetscCall(PetscLogFlops(2 * N * nz));
299   PetscCall(VecRestoreArrayRead(xx, &x));
300   PetscCall(VecRestoreArray(zz, &z));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 /* -------------------------------------------------------------------------------------- */
305 
306 static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
307 {
308   PetscFunctionBegin;
309   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2));
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
313 static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
314 {
315   PetscFunctionBegin;
316   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2));
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
321 {
322   PetscFunctionBegin;
323   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
328 {
329   PetscFunctionBegin;
330   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2));
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 /* -------------------------------------------------------------------------------------- */
335 
336 static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
337 {
338   PetscFunctionBegin;
339   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3));
340   PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 
343 static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
344 {
345   PetscFunctionBegin;
346   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3));
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
351 {
352   PetscFunctionBegin;
353   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
358 {
359   PetscFunctionBegin;
360   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3));
361   PetscFunctionReturn(PETSC_SUCCESS);
362 }
363 
364 /* ------------------------------------------------------------------------------ */
365 
366 static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
367 {
368   PetscFunctionBegin;
369   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
374 {
375   PetscFunctionBegin;
376   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
381 {
382   PetscFunctionBegin;
383   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4));
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
388 {
389   PetscFunctionBegin;
390   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 /* ------------------------------------------------------------------------------ */
395 
396 static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
397 {
398   PetscFunctionBegin;
399   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
404 {
405   PetscFunctionBegin;
406   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5));
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
411 {
412   PetscFunctionBegin;
413   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5));
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
418 {
419   PetscFunctionBegin;
420   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5));
421   PetscFunctionReturn(PETSC_SUCCESS);
422 }
423 
424 /* ------------------------------------------------------------------------------ */
425 
426 static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
427 {
428   PetscFunctionBegin;
429   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6));
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
434 {
435   PetscFunctionBegin;
436   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6));
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
440 static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
441 {
442   PetscFunctionBegin;
443   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
448 {
449   PetscFunctionBegin;
450   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6));
451   PetscFunctionReturn(PETSC_SUCCESS);
452 }
453 
454 /* ------------------------------------------------------------------------------ */
455 
456 static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
457 {
458   PetscFunctionBegin;
459   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7));
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
464 {
465   PetscFunctionBegin;
466   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7));
467   PetscFunctionReturn(PETSC_SUCCESS);
468 }
469 
470 static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
471 {
472   PetscFunctionBegin;
473   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
478 {
479   PetscFunctionBegin;
480   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7));
481   PetscFunctionReturn(PETSC_SUCCESS);
482 }
483 
484 /* ------------------------------------------------------------------------------ */
485 
486 static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
487 {
488   PetscFunctionBegin;
489   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8));
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
494 {
495   PetscFunctionBegin;
496   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8));
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
501 {
502   PetscFunctionBegin;
503   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8));
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
508 {
509   PetscFunctionBegin;
510   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8));
511   PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 
514 /* ------------------------------------------------------------------------------ */
515 
516 static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
517 {
518   PetscFunctionBegin;
519   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9));
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
524 {
525   PetscFunctionBegin;
526   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   PetscFunctionBegin;
533   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9));
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
538 {
539   PetscFunctionBegin;
540   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9));
541   PetscFunctionReturn(PETSC_SUCCESS);
542 }
543 
544 /* ------------------------------------------------------------------------------ */
545 
546 static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
547 {
548   PetscFunctionBegin;
549   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10));
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
554 {
555   PetscFunctionBegin;
556   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
561 {
562   PetscFunctionBegin;
563   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
568 {
569   PetscFunctionBegin;
570   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10));
571   PetscFunctionReturn(PETSC_SUCCESS);
572 }
573 
574 /*-------------------------------------------------------------------------------------------- */
575 
576 static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
577 {
578   PetscFunctionBegin;
579   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
584 {
585   PetscFunctionBegin;
586   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11));
587   PetscFunctionReturn(PETSC_SUCCESS);
588 }
589 
590 static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
591 {
592   PetscFunctionBegin;
593   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11));
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
598 {
599   PetscFunctionBegin;
600   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11));
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
604 /*-------------------------------------------------------------------------------------------- */
605 
606 static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
607 {
608   PetscFunctionBegin;
609   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 /*-------------------------------------------------------------------------------------------- */
635 
636 static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
637 {
638   PetscFunctionBegin;
639   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18));
640   PetscFunctionReturn(PETSC_SUCCESS);
641 }
642 
643 static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
644 {
645   PetscFunctionBegin;
646   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
651 {
652   PetscFunctionBegin;
653   PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18));
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
658 {
659   PetscFunctionBegin;
660   PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*-------------------------------------------------------------------------------------------- */
665 
666 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
667 {
668   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
669   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
670   const PetscScalar *x, *v;
671   PetscScalar       *y, *sums;
672   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
673   PetscInt           n, i, jrow, j, dof = b->dof, k;
674 
675   PetscFunctionBegin;
676   PetscCall(VecGetArrayRead(xx, &x));
677   PetscCall(VecSet(yy, 0.0));
678   PetscCall(VecGetArray(yy, &y));
679   idx = a->j;
680   v   = a->a;
681   ii  = a->i;
682 
683   for (i = 0; i < m; i++) {
684     jrow = ii[i];
685     n    = ii[i + 1] - jrow;
686     sums = y + dof * i;
687     for (j = 0; j < n; j++) {
688       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
689       jrow++;
690     }
691   }
692 
693   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
694   PetscCall(VecRestoreArrayRead(xx, &x));
695   PetscCall(VecRestoreArray(yy, &y));
696   PetscFunctionReturn(PETSC_SUCCESS);
697 }
698 
699 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
700 {
701   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
702   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
703   const PetscScalar *x, *v;
704   PetscScalar       *y, *sums;
705   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
706   PetscInt           n, i, jrow, j, dof = b->dof, k;
707 
708   PetscFunctionBegin;
709   if (yy != zz) PetscCall(VecCopy(yy, zz));
710   PetscCall(VecGetArrayRead(xx, &x));
711   PetscCall(VecGetArray(zz, &y));
712   idx = a->j;
713   v   = a->a;
714   ii  = a->i;
715 
716   for (i = 0; i < m; i++) {
717     jrow = ii[i];
718     n    = ii[i + 1] - jrow;
719     sums = y + dof * i;
720     for (j = 0; j < n; j++) {
721       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
722       jrow++;
723     }
724   }
725 
726   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
727   PetscCall(VecRestoreArrayRead(xx, &x));
728   PetscCall(VecRestoreArray(zz, &y));
729   PetscFunctionReturn(PETSC_SUCCESS);
730 }
731 
732 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
733 {
734   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
735   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
736   const PetscScalar *x, *v, *alpha;
737   PetscScalar       *y;
738   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
739   PetscInt           n, i, k;
740 
741   PetscFunctionBegin;
742   PetscCall(VecGetArrayRead(xx, &x));
743   PetscCall(VecSet(yy, 0.0));
744   PetscCall(VecGetArray(yy, &y));
745   for (i = 0; i < m; i++) {
746     idx   = a->j + a->i[i];
747     v     = a->a + a->i[i];
748     n     = a->i[i + 1] - a->i[i];
749     alpha = x + dof * i;
750     while (n-- > 0) {
751       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
752       idx++;
753       v++;
754     }
755   }
756   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
757   PetscCall(VecRestoreArrayRead(xx, &x));
758   PetscCall(VecRestoreArray(yy, &y));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
763 {
764   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
765   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
766   const PetscScalar *x, *v, *alpha;
767   PetscScalar       *y;
768   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
769   PetscInt           n, i, k;
770 
771   PetscFunctionBegin;
772   if (yy != zz) PetscCall(VecCopy(yy, zz));
773   PetscCall(VecGetArrayRead(xx, &x));
774   PetscCall(VecGetArray(zz, &y));
775   for (i = 0; i < m; i++) {
776     idx   = a->j + a->i[i];
777     v     = a->a + a->i[i];
778     n     = a->i[i + 1] - a->i[i];
779     alpha = x + dof * i;
780     while (n-- > 0) {
781       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
782       idx++;
783       v++;
784     }
785   }
786   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
787   PetscCall(VecRestoreArrayRead(xx, &x));
788   PetscCall(VecRestoreArray(zz, &y));
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*-------------------------------------------------------------------------------------------- */
793 
794 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
795 {
796   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
797 
798   PetscFunctionBegin;
799   /* start the scatter */
800   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
801   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
802   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
803   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
808 {
809   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
810 
811   PetscFunctionBegin;
812   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
813   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
814   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
815   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
816   PetscFunctionReturn(PETSC_SUCCESS);
817 }
818 
819 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
820 {
821   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
822 
823   PetscFunctionBegin;
824   /* start the scatter */
825   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
826   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
827   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
828   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
829   PetscFunctionReturn(PETSC_SUCCESS);
830 }
831 
832 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
833 {
834   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
835 
836   PetscFunctionBegin;
837   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
838   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
839   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
840   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 /* ---------------------------------------------------------------- */
845 
846 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
847 {
848   Mat_Product *product = C->product;
849 
850   PetscFunctionBegin;
851   if (product->type == MATPRODUCT_PtAP) {
852     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
853   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
858 {
859   Mat_Product *product = C->product;
860   PetscBool    flg     = PETSC_FALSE;
861   Mat          A = product->A, P = product->B;
862   PetscInt     alg = 1; /* set default algorithm */
863 #if !defined(PETSC_HAVE_HYPRE)
864   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
865   PetscInt    nalg        = 4;
866 #else
867   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
868   PetscInt    nalg        = 5;
869 #endif
870 
871   PetscFunctionBegin;
872   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
873 
874   /* PtAP */
875   /* Check matrix local sizes */
876   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
877              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
878   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
879              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
880 
881   /* Set the default algorithm */
882   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
883   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
884 
885   /* Get runtime option */
886   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
887   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
888   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
889   PetscOptionsEnd();
890 
891   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
892   if (flg) {
893     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
894     PetscFunctionReturn(PETSC_SUCCESS);
895   }
896 
897   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
898   if (flg) {
899     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
900     PetscFunctionReturn(PETSC_SUCCESS);
901   }
902 
903   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
904   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
905   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
906   PetscCall(MatProductSetFromOptions(C));
907   PetscFunctionReturn(PETSC_SUCCESS);
908 }
909 
910 /* ---------------------------------------------------------------- */
911 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
912 {
913   /* This routine requires testing -- first draft only */
914   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
915   Mat              P  = pp->AIJ;
916   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
917   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
918   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
919   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
920   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
921   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
922   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
923   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
924   MatScalar       *ca = c->a, *caj, *apa;
925 
926   PetscFunctionBegin;
927   /* Allocate temporary array for storage of one row of A*P */
928   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
929 
930   /* Clear old values in C */
931   PetscCall(PetscArrayzero(ca, ci[cm]));
932 
933   for (i = 0; i < am; i++) {
934     /* Form sparse row of A*P */
935     anzi  = ai[i + 1] - ai[i];
936     apnzj = 0;
937     for (j = 0; j < anzi; j++) {
938       /* Get offset within block of P */
939       pshift = *aj % ppdof;
940       /* Get block row of P */
941       prow = *aj++ / ppdof; /* integer division */
942       pnzj = pi[prow + 1] - pi[prow];
943       pjj  = pj + pi[prow];
944       paj  = pa + pi[prow];
945       for (k = 0; k < pnzj; k++) {
946         poffset = pjj[k] * ppdof + pshift;
947         if (!apjdense[poffset]) {
948           apjdense[poffset] = -1;
949           apj[apnzj++]      = poffset;
950         }
951         apa[poffset] += (*aa) * paj[k];
952       }
953       PetscCall(PetscLogFlops(2.0 * pnzj));
954       aa++;
955     }
956 
957     /* Sort the j index array for quick sparse axpy. */
958     /* Note: a array does not need sorting as it is in dense storage locations. */
959     PetscCall(PetscSortInt(apnzj, apj));
960 
961     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
962     prow    = i / ppdof; /* integer division */
963     pshift  = i % ppdof;
964     poffset = pi[prow];
965     pnzi    = pi[prow + 1] - poffset;
966     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
967     pJ = pj + poffset;
968     pA = pa + poffset;
969     for (j = 0; j < pnzi; j++) {
970       crow = (*pJ) * ppdof + pshift;
971       cjj  = cj + ci[crow];
972       caj  = ca + ci[crow];
973       pJ++;
974       /* Perform sparse axpy operation.  Note cjj includes apj. */
975       for (k = 0, nextap = 0; nextap < apnzj; k++) {
976         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
977       }
978       PetscCall(PetscLogFlops(2.0 * apnzj));
979       pA++;
980     }
981 
982     /* Zero the current row info for A*P */
983     for (j = 0; j < apnzj; j++) {
984       apa[apj[j]]      = 0.;
985       apjdense[apj[j]] = 0;
986     }
987   }
988 
989   /* Assemble the final matrix and clean up */
990   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
991   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
992   PetscCall(PetscFree3(apa, apj, apjdense));
993   PetscFunctionReturn(PETSC_SUCCESS);
994 }
995 
996 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
997 {
998   PetscFreeSpaceList free_space = NULL, current_space = NULL;
999   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
1000   Mat                P  = pp->AIJ;
1001   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
1002   PetscInt          *pti, *ptj, *ptJ;
1003   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
1004   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
1005   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
1006   MatScalar         *ca;
1007   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
1008 
1009   PetscFunctionBegin;
1010   /* Get ij structure of P^T */
1011   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
1012 
1013   cn = pn * ppdof;
1014   /* Allocate ci array, arrays for fill computation and */
1015   /* free space for accumulating nonzero column info */
1016   PetscCall(PetscMalloc1(cn + 1, &ci));
1017   ci[0] = 0;
1018 
1019   /* Work arrays for rows of P^T*A */
1020   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
1021   PetscCall(PetscArrayzero(ptadenserow, an));
1022   PetscCall(PetscArrayzero(denserow, cn));
1023 
1024   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1025   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1026   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
1027   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
1028   current_space = free_space;
1029 
1030   /* Determine symbolic info for each row of C: */
1031   for (i = 0; i < pn; i++) {
1032     ptnzi = pti[i + 1] - pti[i];
1033     ptJ   = ptj + pti[i];
1034     for (dof = 0; dof < ppdof; dof++) {
1035       ptanzi = 0;
1036       /* Determine symbolic row of PtA: */
1037       for (j = 0; j < ptnzi; j++) {
1038         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
1039         arow = ptJ[j] * ppdof + dof;
1040         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
1041         anzj = ai[arow + 1] - ai[arow];
1042         ajj  = aj + ai[arow];
1043         for (k = 0; k < anzj; k++) {
1044           if (!ptadenserow[ajj[k]]) {
1045             ptadenserow[ajj[k]]    = -1;
1046             ptasparserow[ptanzi++] = ajj[k];
1047           }
1048         }
1049       }
1050       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1051       ptaj = ptasparserow;
1052       cnzi = 0;
1053       for (j = 0; j < ptanzi; j++) {
1054         /* Get offset within block of P */
1055         pshift = *ptaj % ppdof;
1056         /* Get block row of P */
1057         prow = (*ptaj++) / ppdof; /* integer division */
1058         /* P has same number of nonzeros per row as the compressed form */
1059         pnzj = pi[prow + 1] - pi[prow];
1060         pjj  = pj + pi[prow];
1061         for (k = 0; k < pnzj; k++) {
1062           /* Locations in C are shifted by the offset within the block */
1063           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
1064           if (!denserow[pjj[k] * ppdof + pshift]) {
1065             denserow[pjj[k] * ppdof + pshift] = -1;
1066             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
1067           }
1068         }
1069       }
1070 
1071       /* sort sparserow */
1072       PetscCall(PetscSortInt(cnzi, sparserow));
1073 
1074       /* If free space is not available, make more free space */
1075       /* Double the amount of total space in the list */
1076       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
1077 
1078       /* Copy data into free space, and zero out denserows */
1079       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
1080 
1081       current_space->array += cnzi;
1082       current_space->local_used += cnzi;
1083       current_space->local_remaining -= cnzi;
1084 
1085       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
1086       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
1087 
1088       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1089       /*        For now, we will recompute what is needed. */
1090       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
1091     }
1092   }
1093   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1094   /* Allocate space for cj, initialize cj, and */
1095   /* destroy list of free space and other temporary array(s) */
1096   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
1097   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
1098   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
1099 
1100   /* Allocate space for ca */
1101   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
1102 
1103   /* put together the new matrix */
1104   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
1105   PetscCall(MatSetBlockSize(C, pp->dof));
1106 
1107   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1108   /* Since these are PETSc arrays, change flags to free them as necessary. */
1109   c          = (Mat_SeqAIJ *)(C->data);
1110   c->free_a  = PETSC_TRUE;
1111   c->free_ij = PETSC_TRUE;
1112   c->nonew   = 0;
1113 
1114   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
1115   C->ops->productnumeric = MatProductNumeric_PtAP;
1116 
1117   /* Clean up. */
1118   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
1123 {
1124   Mat_Product *product = C->product;
1125   Mat          A = product->A, P = product->B;
1126 
1127   PetscFunctionBegin;
1128   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
1133 
1134 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
1135 {
1136   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1137 
1138   PetscFunctionBegin;
1139 
1140   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
1145 
1146 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
1147 {
1148   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1149 
1150   PetscFunctionBegin;
1151   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
1152   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
1153   PetscFunctionReturn(PETSC_SUCCESS);
1154 }
1155 
1156 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
1157 
1158 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
1159 {
1160   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1161 
1162   PetscFunctionBegin;
1163 
1164   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
1169 
1170 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
1171 {
1172   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
1173 
1174   PetscFunctionBegin;
1175 
1176   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
1177   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
1178   PetscFunctionReturn(PETSC_SUCCESS);
1179 }
1180 
1181 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
1182 {
1183   Mat_Product *product = C->product;
1184   Mat          A = product->A, P = product->B;
1185   PetscBool    flg;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
1189   if (flg) {
1190     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
1191     C->ops->productnumeric = MatProductNumeric_PtAP;
1192     PetscFunctionReturn(PETSC_SUCCESS);
1193   }
1194 
1195   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
1196   if (flg) {
1197     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
1198     C->ops->productnumeric = MatProductNumeric_PtAP;
1199     PetscFunctionReturn(PETSC_SUCCESS);
1200   }
1201 
1202   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
1203 }
1204 
1205 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1206 {
1207   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
1208   Mat          a   = b->AIJ, B;
1209   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
1210   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
1211   PetscInt    *cols;
1212   PetscScalar *vals;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(MatGetSize(a, &m, &n));
1216   PetscCall(PetscMalloc1(dof * m, &ilen));
1217   for (i = 0; i < m; i++) {
1218     nmax = PetscMax(nmax, aij->ilen[i]);
1219     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
1220   }
1221   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
1222   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
1223   PetscCall(MatSetType(B, newtype));
1224   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
1225   PetscCall(PetscFree(ilen));
1226   PetscCall(PetscMalloc1(nmax, &icols));
1227   ii = 0;
1228   for (i = 0; i < m; i++) {
1229     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
1230     for (j = 0; j < dof; j++) {
1231       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
1232       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
1233       ii++;
1234     }
1235     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
1236   }
1237   PetscCall(PetscFree(icols));
1238   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1239   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1240 
1241   if (reuse == MAT_INPLACE_MATRIX) {
1242     PetscCall(MatHeaderReplace(A, &B));
1243   } else {
1244     *newmat = B;
1245   }
1246   PetscFunctionReturn(PETSC_SUCCESS);
1247 }
1248 
1249 #include <../src/mat/impls/aij/mpi/mpiaij.h>
1250 
1251 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1252 {
1253   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
1254   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
1255   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
1256   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
1257   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
1258   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
1259   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
1260   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
1261   PetscInt     rstart, cstart, *garray, ii, k;
1262   PetscScalar *vals, *ovals;
1263 
1264   PetscFunctionBegin;
1265   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
1266   for (i = 0; i < A->rmap->n / dof; i++) {
1267     nmax  = PetscMax(nmax, AIJ->ilen[i]);
1268     onmax = PetscMax(onmax, OAIJ->ilen[i]);
1269     for (j = 0; j < dof; j++) {
1270       dnz[dof * i + j] = AIJ->ilen[i];
1271       onz[dof * i + j] = OAIJ->ilen[i];
1272     }
1273   }
1274   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1275   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1276   PetscCall(MatSetType(B, newtype));
1277   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
1278   PetscCall(MatSetBlockSize(B, dof));
1279   PetscCall(PetscFree2(dnz, onz));
1280 
1281   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
1282   rstart = dof * maij->A->rmap->rstart;
1283   cstart = dof * maij->A->cmap->rstart;
1284   garray = mpiaij->garray;
1285 
1286   ii = rstart;
1287   for (i = 0; i < A->rmap->n / dof; i++) {
1288     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
1289     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
1290     for (j = 0; j < dof; j++) {
1291       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
1292       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
1293       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
1294       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
1295       ii++;
1296     }
1297     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
1298     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
1299   }
1300   PetscCall(PetscFree2(icols, oicols));
1301 
1302   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1303   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1304 
1305   if (reuse == MAT_INPLACE_MATRIX) {
1306     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
1307     ((PetscObject)A)->refct = 1;
1308 
1309     PetscCall(MatHeaderReplace(A, &B));
1310 
1311     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
1312   } else {
1313     *newmat = B;
1314   }
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1319 {
1320   Mat A;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1324   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1325   PetscCall(MatDestroy(&A));
1326   PetscFunctionReturn(PETSC_SUCCESS);
1327 }
1328 
1329 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1330 {
1331   Mat A;
1332 
1333   PetscFunctionBegin;
1334   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1335   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1336   PetscCall(MatDestroy(&A));
1337   PetscFunctionReturn(PETSC_SUCCESS);
1338 }
1339 
1340 /* ---------------------------------------------------------------------------------- */
1341 /*@
1342   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1343   operations for multicomponent problems.  It interpolates each component the same
1344   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1345   and `MATMPIAIJ` for distributed matrices.
1346 
1347   Collective
1348 
1349   Input Parameters:
1350 + A - the `MATAIJ` matrix describing the action on blocks
1351 - dof - the block size (number of components per node)
1352 
1353   Output Parameter:
1354 . maij - the new `MATMAIJ` matrix
1355 
1356   Operations provided:
1357 .vb
1358     MatMult()
1359     MatMultTranspose()
1360     MatMultAdd()
1361     MatMultTransposeAdd()
1362     MatView()
1363 .ve
1364 
1365   Level: advanced
1366 
1367 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
1368 @*/
1369 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1370 {
1371   PetscInt  n;
1372   Mat       B;
1373   PetscBool flg;
1374 #if defined(PETSC_HAVE_CUDA)
1375   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1376   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1377 #endif
1378 
1379   PetscFunctionBegin;
1380   dof = PetscAbs(dof);
1381   PetscCall(PetscObjectReference((PetscObject)A));
1382 
1383   if (dof == 1) *maij = A;
1384   else {
1385     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1386     /* propagate vec type */
1387     PetscCall(MatSetVecType(B, A->defaultvectype));
1388     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1389     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1390     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1391     PetscCall(PetscLayoutSetUp(B->rmap));
1392     PetscCall(PetscLayoutSetUp(B->cmap));
1393 
1394     B->assembled = PETSC_TRUE;
1395 
1396     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1397     if (flg) {
1398       Mat_SeqMAIJ *b;
1399 
1400       PetscCall(MatSetType(B, MATSEQMAIJ));
1401 
1402       B->ops->setup   = NULL;
1403       B->ops->destroy = MatDestroy_SeqMAIJ;
1404       B->ops->view    = MatView_SeqMAIJ;
1405 
1406       b      = (Mat_SeqMAIJ *)B->data;
1407       b->dof = dof;
1408       b->AIJ = A;
1409 
1410       if (dof == 2) {
1411         B->ops->mult             = MatMult_SeqMAIJ_2;
1412         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1413         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1414         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1415       } else if (dof == 3) {
1416         B->ops->mult             = MatMult_SeqMAIJ_3;
1417         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1418         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1419         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1420       } else if (dof == 4) {
1421         B->ops->mult             = MatMult_SeqMAIJ_4;
1422         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1423         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1424         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1425       } else if (dof == 5) {
1426         B->ops->mult             = MatMult_SeqMAIJ_5;
1427         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1428         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1429         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1430       } else if (dof == 6) {
1431         B->ops->mult             = MatMult_SeqMAIJ_6;
1432         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1433         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1434         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1435       } else if (dof == 7) {
1436         B->ops->mult             = MatMult_SeqMAIJ_7;
1437         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1438         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1439         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1440       } else if (dof == 8) {
1441         B->ops->mult             = MatMult_SeqMAIJ_8;
1442         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1443         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1444         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1445       } else if (dof == 9) {
1446         B->ops->mult             = MatMult_SeqMAIJ_9;
1447         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1448         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1449         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1450       } else if (dof == 10) {
1451         B->ops->mult             = MatMult_SeqMAIJ_10;
1452         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1453         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1454         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1455       } else if (dof == 11) {
1456         B->ops->mult             = MatMult_SeqMAIJ_11;
1457         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1458         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1459         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1460       } else if (dof == 16) {
1461         B->ops->mult             = MatMult_SeqMAIJ_16;
1462         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1463         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1464         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1465       } else if (dof == 18) {
1466         B->ops->mult             = MatMult_SeqMAIJ_18;
1467         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1468         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1469         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1470       } else {
1471         B->ops->mult             = MatMult_SeqMAIJ_N;
1472         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1473         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1474         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1475       }
1476 #if defined(PETSC_HAVE_CUDA)
1477       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1478 #endif
1479       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1480       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1481     } else {
1482       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1483       Mat_MPIMAIJ *b;
1484       IS           from, to;
1485       Vec          gvec;
1486 
1487       PetscCall(MatSetType(B, MATMPIMAIJ));
1488 
1489       B->ops->setup   = NULL;
1490       B->ops->destroy = MatDestroy_MPIMAIJ;
1491       B->ops->view    = MatView_MPIMAIJ;
1492 
1493       b      = (Mat_MPIMAIJ *)B->data;
1494       b->dof = dof;
1495       b->A   = A;
1496 
1497       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1498       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1499 
1500       PetscCall(VecGetSize(mpiaij->lvec, &n));
1501       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1502       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1503       PetscCall(VecSetBlockSize(b->w, dof));
1504       PetscCall(VecSetType(b->w, VECSEQ));
1505 
1506       /* create two temporary Index sets for build scatter gather */
1507       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1508       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1509 
1510       /* create temporary global vector to generate scatter context */
1511       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1512 
1513       /* generate the scatter context */
1514       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1515 
1516       PetscCall(ISDestroy(&from));
1517       PetscCall(ISDestroy(&to));
1518       PetscCall(VecDestroy(&gvec));
1519 
1520       B->ops->mult             = MatMult_MPIMAIJ_dof;
1521       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1522       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1523       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1524 
1525 #if defined(PETSC_HAVE_CUDA)
1526       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1527 #endif
1528       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1529       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1530     }
1531     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1532     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1533     PetscCall(MatSetUp(B));
1534 #if defined(PETSC_HAVE_CUDA)
1535     /* temporary until we have CUDA implementation of MAIJ */
1536     {
1537       PetscBool flg;
1538       if (convert) {
1539         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1540         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1541       }
1542     }
1543 #endif
1544     *maij = B;
1545   }
1546   PetscFunctionReturn(PETSC_SUCCESS);
1547 }
1548