xref: /petsc/src/mat/impls/maij/maij.c (revision ec34bba6ec188c32e12dca9a8cfb2fb88bebbee0)
1 
2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/utils/freespace.h>
4 
5 /*@
6    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7 
8    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9 
10    Input Parameter:
11 .  A - the `MATMAIJ` matrix
12 
13    Output Parameter:
14 .  B - the `MATAIJ` matrix
15 
16    Level: advanced
17 
18    Note:
19     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20 
21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22 @*/
23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24 {
25   PetscBool ismpimaij, isseqmaij;
26 
27   PetscFunctionBegin;
28   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
29   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30   if (ismpimaij) {
31     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32 
33     *B = b->A;
34   } else if (isseqmaij) {
35     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36 
37     *B = b->AIJ;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 /*@
45    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46 
47    Logically Collective
48 
49    Input Parameters:
50 +  A - the `MATMAIJ` matrix
51 -  dof - the block size for the new matrix
52 
53    Output Parameter:
54 .  B - the new `MATMAIJ` matrix
55 
56    Level: advanced
57 
58 .seealso: `MATMAIJ`, `MatCreateMAIJ()`
59 @*/
60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61 {
62   Mat Aij = NULL;
63 
64   PetscFunctionBegin;
65   PetscValidLogicalCollectiveInt(A, dof, 2);
66   PetscCall(MatMAIJGetAIJ(A, &Aij));
67   PetscCall(MatCreateMAIJ(Aij, dof, B));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72 {
73   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74 
75   PetscFunctionBegin;
76   PetscCall(MatDestroy(&b->AIJ));
77   PetscCall(PetscFree(A->data));
78   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
79   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatSetUp_MAIJ(Mat A)
85 {
86   PetscFunctionBegin;
87   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88 }
89 
90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91 {
92   Mat B;
93 
94   PetscFunctionBegin;
95   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
96   PetscCall(MatView(B, viewer));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107   PetscCall(MatView(B, viewer));
108   PetscCall(MatDestroy(&B));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113 {
114   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115 
116   PetscFunctionBegin;
117   PetscCall(MatDestroy(&b->AIJ));
118   PetscCall(MatDestroy(&b->OAIJ));
119   PetscCall(MatDestroy(&b->A));
120   PetscCall(VecScatterDestroy(&b->ctx));
121   PetscCall(VecDestroy(&b->w));
122   PetscCall(PetscFree(A->data));
123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 /*MC
131   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132   multicomponent problems, interpolating or restricting each component the same way independently.
133   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134 
135   Operations provided:
136 .vb
137     MatMult()
138     MatMultTranspose()
139     MatMultAdd()
140     MatMultTransposeAdd()
141 .ve
142 
143   Level: advanced
144 
145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146 M*/
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149 {
150   Mat_MPIMAIJ *b;
151   PetscMPIInt  size;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscNew(&b));
155   A->data = (void *)b;
156 
157   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158 
159   A->ops->setup = MatSetUp_MAIJ;
160 
161   b->AIJ  = NULL;
162   b->dof  = 0;
163   b->OAIJ = NULL;
164   b->ctx  = NULL;
165   b->w    = NULL;
166   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167   if (size == 1) {
168     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169   } else {
170     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171   }
172   A->preallocated = PETSC_TRUE;
173   A->assembled    = PETSC_TRUE;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 #if PetscHasAttribute(always_inline)
178   #define PETSC_FORCE_INLINE __attribute__((always_inline))
179 #else
180   #define PETSC_FORCE_INLINE
181 #endif
182 
183 #if defined(__clang__)
184   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
185 #else
186   #define PETSC_PRAGMA_UNROLL
187 #endif
188 
189 enum {
190   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
191 };
192 
193 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
194 // keyword into account for these...
195 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
196 {
197   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
198   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
199   const Mat          baij       = b->AIJ;
200   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
201   const PetscInt     m          = baij->rmap->n;
202   const PetscInt     nz         = a->nz;
203   const PetscInt    *idx        = a->j;
204   const PetscInt    *ii         = a->i;
205   const PetscScalar *v          = a->a;
206   PetscInt           nonzerorow = 0;
207   const PetscScalar *x;
208   PetscScalar       *z;
209 
210   PetscFunctionBegin;
211   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
212   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
213   PetscCall(VecGetArrayRead(xx, &x));
214   if (mult_add) {
215     PetscCall(VecGetArray(zz, &z));
216   } else {
217     PetscCall(VecGetArrayWrite(zz, &z));
218   }
219 
220   for (PetscInt i = 0; i < m; ++i) {
221     PetscInt       jrow = ii[i];
222     const PetscInt n    = ii[i + 1] - jrow;
223     // leave a line so clang-format does not align these decls
224     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
225 
226     nonzerorow += n > 0;
227     for (PetscInt j = 0; j < n; ++j, ++jrow) {
228       const PetscScalar v_jrow     = v[jrow];
229       const PetscInt    N_idx_jrow = N * idx[jrow];
230 
231       PETSC_PRAGMA_UNROLL
232       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
233     }
234 
235     PETSC_PRAGMA_UNROLL
236     for (int k = 0; k < N; ++k) {
237       const PetscInt z_idx = N * i + k;
238 
239       if (mult_add) {
240         z[z_idx] += sum[k];
241       } else {
242         z[z_idx] = sum[k];
243       }
244     }
245   }
246   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
247   PetscCall(VecRestoreArrayRead(xx, &x));
248   if (mult_add) {
249     PetscCall(VecRestoreArray(zz, &z));
250   } else {
251     PetscCall(VecRestoreArrayWrite(zz, &z));
252   }
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
257 {
258   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
259   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
260   const Mat          baij     = b->AIJ;
261   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
262   const PetscInt     m        = baij->rmap->n;
263   const PetscInt     nz       = a->nz;
264   const PetscInt    *a_j      = a->j;
265   const PetscInt    *a_i      = a->i;
266   const PetscScalar *a_a      = a->a;
267   const PetscScalar *x;
268   PetscScalar       *z;
269 
270   PetscFunctionBegin;
271   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
272   if (mult_add) {
273     if (yy != zz) PetscCall(VecCopy(yy, zz));
274   } else {
275     PetscCall(VecSet(zz, 0.0));
276   }
277   PetscCall(VecGetArrayRead(xx, &x));
278   PetscCall(VecGetArray(zz, &z));
279 
280   for (PetscInt i = 0; i < m; i++) {
281     const PetscInt     a_ii = a_i[i];
282     const PetscInt    *idx  = a_j + a_ii;
283     const PetscScalar *v    = a_a + a_ii;
284     const PetscInt     n    = a_i[i + 1] - a_ii;
285     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
286 
287     PETSC_PRAGMA_UNROLL
288     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
289     for (PetscInt j = 0; j < n; ++j) {
290       const PetscInt    N_idx_j = N * idx[j];
291       const PetscScalar v_j     = v[j];
292 
293       PETSC_PRAGMA_UNROLL
294       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
295     }
296   }
297 
298   PetscCall(PetscLogFlops(2 * N * nz));
299   PetscCall(VecRestoreArrayRead(xx, &x));
300   PetscCall(VecRestoreArray(zz, &z));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 /* -------------------------------------------------------------------------------------- */
305 
306 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
307   static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
308   { \
309     PetscFunctionBegin; \
310     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
311     PetscFunctionReturn(PETSC_SUCCESS); \
312   } \
313   static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
314   { \
315     PetscFunctionBegin; \
316     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
317     PetscFunctionReturn(PETSC_SUCCESS); \
318   } \
319   static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
320   { \
321     PetscFunctionBegin; \
322     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
323     PetscFunctionReturn(PETSC_SUCCESS); \
324   } \
325   static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
326   { \
327     PetscFunctionBegin; \
328     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
329     PetscFunctionReturn(PETSC_SUCCESS); \
330   }
331 
332 // clang-format off
333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
341 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
342 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
343 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
344 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
345 // clang-format on
346 
347 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
348 
349 /*-------------------------------------------------------------------------------------------- */
350 
351 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
352 {
353   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
354   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
355   const PetscScalar *x, *v;
356   PetscScalar       *y, *sums;
357   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
358   PetscInt           n, i, jrow, j, dof = b->dof, k;
359 
360   PetscFunctionBegin;
361   PetscCall(VecGetArrayRead(xx, &x));
362   PetscCall(VecSet(yy, 0.0));
363   PetscCall(VecGetArray(yy, &y));
364   idx = a->j;
365   v   = a->a;
366   ii  = a->i;
367 
368   for (i = 0; i < m; i++) {
369     jrow = ii[i];
370     n    = ii[i + 1] - jrow;
371     sums = y + dof * i;
372     for (j = 0; j < n; j++) {
373       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
374       jrow++;
375     }
376   }
377 
378   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
379   PetscCall(VecRestoreArrayRead(xx, &x));
380   PetscCall(VecRestoreArray(yy, &y));
381   PetscFunctionReturn(PETSC_SUCCESS);
382 }
383 
384 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
385 {
386   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
387   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
388   const PetscScalar *x, *v;
389   PetscScalar       *y, *sums;
390   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
391   PetscInt           n, i, jrow, j, dof = b->dof, k;
392 
393   PetscFunctionBegin;
394   if (yy != zz) PetscCall(VecCopy(yy, zz));
395   PetscCall(VecGetArrayRead(xx, &x));
396   PetscCall(VecGetArray(zz, &y));
397   idx = a->j;
398   v   = a->a;
399   ii  = a->i;
400 
401   for (i = 0; i < m; i++) {
402     jrow = ii[i];
403     n    = ii[i + 1] - jrow;
404     sums = y + dof * i;
405     for (j = 0; j < n; j++) {
406       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
407       jrow++;
408     }
409   }
410 
411   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
412   PetscCall(VecRestoreArrayRead(xx, &x));
413   PetscCall(VecRestoreArray(zz, &y));
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
418 {
419   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
421   const PetscScalar *x, *v, *alpha;
422   PetscScalar       *y;
423   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
424   PetscInt           n, i, k;
425 
426   PetscFunctionBegin;
427   PetscCall(VecGetArrayRead(xx, &x));
428   PetscCall(VecSet(yy, 0.0));
429   PetscCall(VecGetArray(yy, &y));
430   for (i = 0; i < m; i++) {
431     idx   = a->j + a->i[i];
432     v     = a->a + a->i[i];
433     n     = a->i[i + 1] - a->i[i];
434     alpha = x + dof * i;
435     while (n-- > 0) {
436       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
437       idx++;
438       v++;
439     }
440   }
441   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
442   PetscCall(VecRestoreArrayRead(xx, &x));
443   PetscCall(VecRestoreArray(yy, &y));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
448 {
449   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
450   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
451   const PetscScalar *x, *v, *alpha;
452   PetscScalar       *y;
453   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
454   PetscInt           n, i, k;
455 
456   PetscFunctionBegin;
457   if (yy != zz) PetscCall(VecCopy(yy, zz));
458   PetscCall(VecGetArrayRead(xx, &x));
459   PetscCall(VecGetArray(zz, &y));
460   for (i = 0; i < m; i++) {
461     idx   = a->j + a->i[i];
462     v     = a->a + a->i[i];
463     n     = a->i[i + 1] - a->i[i];
464     alpha = x + dof * i;
465     while (n-- > 0) {
466       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
467       idx++;
468       v++;
469     }
470   }
471   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
472   PetscCall(VecRestoreArrayRead(xx, &x));
473   PetscCall(VecRestoreArray(zz, &y));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /*-------------------------------------------------------------------------------------------- */
478 
479 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
480 {
481   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
482 
483   PetscFunctionBegin;
484   /* start the scatter */
485   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
486   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
487   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
488   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
493 {
494   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
495 
496   PetscFunctionBegin;
497   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
498   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
499   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
500   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
505 {
506   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
507 
508   PetscFunctionBegin;
509   /* start the scatter */
510   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
511   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
512   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
513   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
514   PetscFunctionReturn(PETSC_SUCCESS);
515 }
516 
517 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
518 {
519   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
520 
521   PetscFunctionBegin;
522   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
523   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
524   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
525   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 /* ---------------------------------------------------------------- */
530 
531 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
532 {
533   Mat_Product *product = C->product;
534 
535   PetscFunctionBegin;
536   if (product->type == MATPRODUCT_PtAP) {
537     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
538   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
539   PetscFunctionReturn(PETSC_SUCCESS);
540 }
541 
542 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
543 {
544   Mat_Product *product = C->product;
545   PetscBool    flg     = PETSC_FALSE;
546   Mat          A = product->A, P = product->B;
547   PetscInt     alg = 1; /* set default algorithm */
548 #if !defined(PETSC_HAVE_HYPRE)
549   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
550   PetscInt    nalg        = 4;
551 #else
552   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
553   PetscInt    nalg        = 5;
554 #endif
555 
556   PetscFunctionBegin;
557   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
558 
559   /* PtAP */
560   /* Check matrix local sizes */
561   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
562              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
563   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
564              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
565 
566   /* Set the default algorithm */
567   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
568   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
569 
570   /* Get runtime option */
571   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
572   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
573   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
574   PetscOptionsEnd();
575 
576   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
577   if (flg) {
578     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
579     PetscFunctionReturn(PETSC_SUCCESS);
580   }
581 
582   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
583   if (flg) {
584     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
585     PetscFunctionReturn(PETSC_SUCCESS);
586   }
587 
588   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
589   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
590   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
591   PetscCall(MatProductSetFromOptions(C));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 /* ---------------------------------------------------------------- */
596 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
597 {
598   /* This routine requires testing -- first draft only */
599   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
600   Mat              P  = pp->AIJ;
601   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
602   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
603   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
604   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
605   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
606   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
607   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
608   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
609   MatScalar       *ca = c->a, *caj, *apa;
610 
611   PetscFunctionBegin;
612   /* Allocate temporary array for storage of one row of A*P */
613   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
614 
615   /* Clear old values in C */
616   PetscCall(PetscArrayzero(ca, ci[cm]));
617 
618   for (i = 0; i < am; i++) {
619     /* Form sparse row of A*P */
620     anzi  = ai[i + 1] - ai[i];
621     apnzj = 0;
622     for (j = 0; j < anzi; j++) {
623       /* Get offset within block of P */
624       pshift = *aj % ppdof;
625       /* Get block row of P */
626       prow = *aj++ / ppdof; /* integer division */
627       pnzj = pi[prow + 1] - pi[prow];
628       pjj  = pj + pi[prow];
629       paj  = pa + pi[prow];
630       for (k = 0; k < pnzj; k++) {
631         poffset = pjj[k] * ppdof + pshift;
632         if (!apjdense[poffset]) {
633           apjdense[poffset] = -1;
634           apj[apnzj++]      = poffset;
635         }
636         apa[poffset] += (*aa) * paj[k];
637       }
638       PetscCall(PetscLogFlops(2.0 * pnzj));
639       aa++;
640     }
641 
642     /* Sort the j index array for quick sparse axpy. */
643     /* Note: a array does not need sorting as it is in dense storage locations. */
644     PetscCall(PetscSortInt(apnzj, apj));
645 
646     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
647     prow    = i / ppdof; /* integer division */
648     pshift  = i % ppdof;
649     poffset = pi[prow];
650     pnzi    = pi[prow + 1] - poffset;
651     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
652     pJ = pj + poffset;
653     pA = pa + poffset;
654     for (j = 0; j < pnzi; j++) {
655       crow = (*pJ) * ppdof + pshift;
656       cjj  = cj + ci[crow];
657       caj  = ca + ci[crow];
658       pJ++;
659       /* Perform sparse axpy operation.  Note cjj includes apj. */
660       for (k = 0, nextap = 0; nextap < apnzj; k++) {
661         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
662       }
663       PetscCall(PetscLogFlops(2.0 * apnzj));
664       pA++;
665     }
666 
667     /* Zero the current row info for A*P */
668     for (j = 0; j < apnzj; j++) {
669       apa[apj[j]]      = 0.;
670       apjdense[apj[j]] = 0;
671     }
672   }
673 
674   /* Assemble the final matrix and clean up */
675   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
676   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
677   PetscCall(PetscFree3(apa, apj, apjdense));
678   PetscFunctionReturn(PETSC_SUCCESS);
679 }
680 
681 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
682 {
683   PetscFreeSpaceList free_space = NULL, current_space = NULL;
684   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
685   Mat                P  = pp->AIJ;
686   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
687   PetscInt          *pti, *ptj, *ptJ;
688   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
689   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
690   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
691   MatScalar         *ca;
692   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
693 
694   PetscFunctionBegin;
695   /* Get ij structure of P^T */
696   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
697 
698   cn = pn * ppdof;
699   /* Allocate ci array, arrays for fill computation and */
700   /* free space for accumulating nonzero column info */
701   PetscCall(PetscMalloc1(cn + 1, &ci));
702   ci[0] = 0;
703 
704   /* Work arrays for rows of P^T*A */
705   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
706   PetscCall(PetscArrayzero(ptadenserow, an));
707   PetscCall(PetscArrayzero(denserow, cn));
708 
709   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
710   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
711   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
712   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
713   current_space = free_space;
714 
715   /* Determine symbolic info for each row of C: */
716   for (i = 0; i < pn; i++) {
717     ptnzi = pti[i + 1] - pti[i];
718     ptJ   = ptj + pti[i];
719     for (dof = 0; dof < ppdof; dof++) {
720       ptanzi = 0;
721       /* Determine symbolic row of PtA: */
722       for (j = 0; j < ptnzi; j++) {
723         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
724         arow = ptJ[j] * ppdof + dof;
725         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
726         anzj = ai[arow + 1] - ai[arow];
727         ajj  = aj + ai[arow];
728         for (k = 0; k < anzj; k++) {
729           if (!ptadenserow[ajj[k]]) {
730             ptadenserow[ajj[k]]    = -1;
731             ptasparserow[ptanzi++] = ajj[k];
732           }
733         }
734       }
735       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
736       ptaj = ptasparserow;
737       cnzi = 0;
738       for (j = 0; j < ptanzi; j++) {
739         /* Get offset within block of P */
740         pshift = *ptaj % ppdof;
741         /* Get block row of P */
742         prow = (*ptaj++) / ppdof; /* integer division */
743         /* P has same number of nonzeros per row as the compressed form */
744         pnzj = pi[prow + 1] - pi[prow];
745         pjj  = pj + pi[prow];
746         for (k = 0; k < pnzj; k++) {
747           /* Locations in C are shifted by the offset within the block */
748           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
749           if (!denserow[pjj[k] * ppdof + pshift]) {
750             denserow[pjj[k] * ppdof + pshift] = -1;
751             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
752           }
753         }
754       }
755 
756       /* sort sparserow */
757       PetscCall(PetscSortInt(cnzi, sparserow));
758 
759       /* If free space is not available, make more free space */
760       /* Double the amount of total space in the list */
761       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));
762 
763       /* Copy data into free space, and zero out denserows */
764       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
765 
766       current_space->array += cnzi;
767       current_space->local_used += cnzi;
768       current_space->local_remaining -= cnzi;
769 
770       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
771       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
772 
773       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
774       /*        For now, we will recompute what is needed. */
775       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
776     }
777   }
778   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
779   /* Allocate space for cj, initialize cj, and */
780   /* destroy list of free space and other temporary array(s) */
781   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
782   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
783   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
784 
785   /* Allocate space for ca */
786   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
787 
788   /* put together the new matrix */
789   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
790   PetscCall(MatSetBlockSize(C, pp->dof));
791 
792   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
793   /* Since these are PETSc arrays, change flags to free them as necessary. */
794   c          = (Mat_SeqAIJ *)(C->data);
795   c->free_a  = PETSC_TRUE;
796   c->free_ij = PETSC_TRUE;
797   c->nonew   = 0;
798 
799   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
800   C->ops->productnumeric = MatProductNumeric_PtAP;
801 
802   /* Clean up. */
803   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
808 {
809   Mat_Product *product = C->product;
810   Mat          A = product->A, P = product->B;
811 
812   PetscFunctionBegin;
813   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
814   PetscFunctionReturn(PETSC_SUCCESS);
815 }
816 
817 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
818 
819 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
820 {
821   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
822 
823   PetscFunctionBegin;
824 
825   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
826   PetscFunctionReturn(PETSC_SUCCESS);
827 }
828 
829 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
830 
831 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
832 {
833   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
834 
835   PetscFunctionBegin;
836   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
837   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
842 
843 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
844 {
845   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
846 
847   PetscFunctionBegin;
848 
849   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
854 
855 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
856 {
857   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
858 
859   PetscFunctionBegin;
860 
861   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
862   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
863   PetscFunctionReturn(PETSC_SUCCESS);
864 }
865 
866 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
867 {
868   Mat_Product *product = C->product;
869   Mat          A = product->A, P = product->B;
870   PetscBool    flg;
871 
872   PetscFunctionBegin;
873   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
874   if (flg) {
875     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
876     C->ops->productnumeric = MatProductNumeric_PtAP;
877     PetscFunctionReturn(PETSC_SUCCESS);
878   }
879 
880   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
881   if (flg) {
882     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
883     C->ops->productnumeric = MatProductNumeric_PtAP;
884     PetscFunctionReturn(PETSC_SUCCESS);
885   }
886 
887   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
888 }
889 
890 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
891 {
892   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
893   Mat          a   = b->AIJ, B;
894   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
895   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
896   PetscInt    *cols;
897   PetscScalar *vals;
898 
899   PetscFunctionBegin;
900   PetscCall(MatGetSize(a, &m, &n));
901   PetscCall(PetscMalloc1(dof * m, &ilen));
902   for (i = 0; i < m; i++) {
903     nmax = PetscMax(nmax, aij->ilen[i]);
904     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
905   }
906   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
907   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
908   PetscCall(MatSetType(B, newtype));
909   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
910   PetscCall(PetscFree(ilen));
911   PetscCall(PetscMalloc1(nmax, &icols));
912   ii = 0;
913   for (i = 0; i < m; i++) {
914     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
915     for (j = 0; j < dof; j++) {
916       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
917       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
918       ii++;
919     }
920     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
921   }
922   PetscCall(PetscFree(icols));
923   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
925 
926   if (reuse == MAT_INPLACE_MATRIX) {
927     PetscCall(MatHeaderReplace(A, &B));
928   } else {
929     *newmat = B;
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 #include <../src/mat/impls/aij/mpi/mpiaij.h>
935 
936 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
937 {
938   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
939   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
940   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
941   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
942   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
943   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
944   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
945   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
946   PetscInt     rstart, cstart, *garray, ii, k;
947   PetscScalar *vals, *ovals;
948 
949   PetscFunctionBegin;
950   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
951   for (i = 0; i < A->rmap->n / dof; i++) {
952     nmax  = PetscMax(nmax, AIJ->ilen[i]);
953     onmax = PetscMax(onmax, OAIJ->ilen[i]);
954     for (j = 0; j < dof; j++) {
955       dnz[dof * i + j] = AIJ->ilen[i];
956       onz[dof * i + j] = OAIJ->ilen[i];
957     }
958   }
959   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
960   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
961   PetscCall(MatSetType(B, newtype));
962   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
963   PetscCall(MatSetBlockSize(B, dof));
964   PetscCall(PetscFree2(dnz, onz));
965 
966   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
967   rstart = dof * maij->A->rmap->rstart;
968   cstart = dof * maij->A->cmap->rstart;
969   garray = mpiaij->garray;
970 
971   ii = rstart;
972   for (i = 0; i < A->rmap->n / dof; i++) {
973     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
974     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
975     for (j = 0; j < dof; j++) {
976       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
977       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
978       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
979       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
980       ii++;
981     }
982     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
983     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
984   }
985   PetscCall(PetscFree2(icols, oicols));
986 
987   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
988   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
989 
990   if (reuse == MAT_INPLACE_MATRIX) {
991     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
992     ((PetscObject)A)->refct = 1;
993 
994     PetscCall(MatHeaderReplace(A, &B));
995 
996     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
997   } else {
998     *newmat = B;
999   }
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
1003 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1004 {
1005   Mat A;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1009   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1010   PetscCall(MatDestroy(&A));
1011   PetscFunctionReturn(PETSC_SUCCESS);
1012 }
1013 
1014 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1015 {
1016   Mat A;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1020   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1021   PetscCall(MatDestroy(&A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 /* ---------------------------------------------------------------------------------- */
1026 /*@
1027   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1028   operations for multicomponent problems.  It interpolates each component the same
1029   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1030   and `MATMPIAIJ` for distributed matrices.
1031 
1032   Collective
1033 
1034   Input Parameters:
1035 + A - the `MATAIJ` matrix describing the action on blocks
1036 - dof - the block size (number of components per node)
1037 
1038   Output Parameter:
1039 . maij - the new `MATMAIJ` matrix
1040 
1041   Operations provided:
1042 .vb
1043     MatMult()
1044     MatMultTranspose()
1045     MatMultAdd()
1046     MatMultTransposeAdd()
1047     MatView()
1048 .ve
1049 
1050   Level: advanced
1051 
1052 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
1053 @*/
1054 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1055 {
1056   PetscInt  n;
1057   Mat       B;
1058   PetscBool flg;
1059 #if defined(PETSC_HAVE_CUDA)
1060   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1061   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1062 #endif
1063 
1064   PetscFunctionBegin;
1065   dof = PetscAbs(dof);
1066   PetscCall(PetscObjectReference((PetscObject)A));
1067 
1068   if (dof == 1) *maij = A;
1069   else {
1070     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1071     /* propagate vec type */
1072     PetscCall(MatSetVecType(B, A->defaultvectype));
1073     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1074     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1075     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1076     PetscCall(PetscLayoutSetUp(B->rmap));
1077     PetscCall(PetscLayoutSetUp(B->cmap));
1078 
1079     B->assembled = PETSC_TRUE;
1080 
1081     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1082     if (flg) {
1083       Mat_SeqMAIJ *b;
1084 
1085       PetscCall(MatSetType(B, MATSEQMAIJ));
1086 
1087       B->ops->setup   = NULL;
1088       B->ops->destroy = MatDestroy_SeqMAIJ;
1089       B->ops->view    = MatView_SeqMAIJ;
1090 
1091       b      = (Mat_SeqMAIJ *)B->data;
1092       b->dof = dof;
1093       b->AIJ = A;
1094 
1095       if (dof == 2) {
1096         B->ops->mult             = MatMult_SeqMAIJ_2;
1097         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1098         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1099         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1100       } else if (dof == 3) {
1101         B->ops->mult             = MatMult_SeqMAIJ_3;
1102         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1103         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1104         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1105       } else if (dof == 4) {
1106         B->ops->mult             = MatMult_SeqMAIJ_4;
1107         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1108         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1109         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1110       } else if (dof == 5) {
1111         B->ops->mult             = MatMult_SeqMAIJ_5;
1112         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1113         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1114         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1115       } else if (dof == 6) {
1116         B->ops->mult             = MatMult_SeqMAIJ_6;
1117         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1118         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1119         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1120       } else if (dof == 7) {
1121         B->ops->mult             = MatMult_SeqMAIJ_7;
1122         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1123         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1124         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1125       } else if (dof == 8) {
1126         B->ops->mult             = MatMult_SeqMAIJ_8;
1127         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1128         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1129         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1130       } else if (dof == 9) {
1131         B->ops->mult             = MatMult_SeqMAIJ_9;
1132         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1133         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1134         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1135       } else if (dof == 10) {
1136         B->ops->mult             = MatMult_SeqMAIJ_10;
1137         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1138         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1139         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1140       } else if (dof == 11) {
1141         B->ops->mult             = MatMult_SeqMAIJ_11;
1142         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1143         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1144         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1145       } else if (dof == 16) {
1146         B->ops->mult             = MatMult_SeqMAIJ_16;
1147         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1148         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1149         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1150       } else if (dof == 18) {
1151         B->ops->mult             = MatMult_SeqMAIJ_18;
1152         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1153         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1154         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1155       } else {
1156         B->ops->mult             = MatMult_SeqMAIJ_N;
1157         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1158         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1159         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1160       }
1161 #if defined(PETSC_HAVE_CUDA)
1162       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1163 #endif
1164       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1165       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1166     } else {
1167       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1168       Mat_MPIMAIJ *b;
1169       IS           from, to;
1170       Vec          gvec;
1171 
1172       PetscCall(MatSetType(B, MATMPIMAIJ));
1173 
1174       B->ops->setup   = NULL;
1175       B->ops->destroy = MatDestroy_MPIMAIJ;
1176       B->ops->view    = MatView_MPIMAIJ;
1177 
1178       b      = (Mat_MPIMAIJ *)B->data;
1179       b->dof = dof;
1180       b->A   = A;
1181 
1182       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1183       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1184 
1185       PetscCall(VecGetSize(mpiaij->lvec, &n));
1186       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1187       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1188       PetscCall(VecSetBlockSize(b->w, dof));
1189       PetscCall(VecSetType(b->w, VECSEQ));
1190 
1191       /* create two temporary Index sets for build scatter gather */
1192       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1193       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1194 
1195       /* create temporary global vector to generate scatter context */
1196       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1197 
1198       /* generate the scatter context */
1199       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1200 
1201       PetscCall(ISDestroy(&from));
1202       PetscCall(ISDestroy(&to));
1203       PetscCall(VecDestroy(&gvec));
1204 
1205       B->ops->mult             = MatMult_MPIMAIJ_dof;
1206       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1207       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1208       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1209 
1210 #if defined(PETSC_HAVE_CUDA)
1211       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1212 #endif
1213       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1214       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1215     }
1216     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1217     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1218     PetscCall(MatSetUp(B));
1219 #if defined(PETSC_HAVE_CUDA)
1220     /* temporary until we have CUDA implementation of MAIJ */
1221     {
1222       PetscBool flg;
1223       if (convert) {
1224         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1225         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1226       }
1227     }
1228 #endif
1229     *maij = B;
1230   }
1231   PetscFunctionReturn(PETSC_SUCCESS);
1232 }
1233