xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 966bd95a39c2334d2e2ce17ad22128f3c1861eeb)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petscdevice.h>
12 #include <petsc/private/deviceimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176     row = idxm[i] - rstart;
177     for (j = 0; j < n; j++) {
178       if (idxn[j] < 0) continue; /* negative column */
179       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180       PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181     }
182   }
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187 {
188   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
189 
190   PetscFunctionBegin;
191   PetscCall(MatDenseGetLDA(a->A, lda));
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196 {
197   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
198   MatType       mtype = MATSEQDENSE;
199 
200   PetscFunctionBegin;
201   if (!a->A) {
202     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203     PetscCall(PetscLayoutSetUp(A->rmap));
204     PetscCall(PetscLayoutSetUp(A->cmap));
205     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207 #if PetscDefined(HAVE_CUDA)
208     PetscBool iscuda;
209     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210     if (iscuda) mtype = MATSEQDENSECUDA;
211 #elif PetscDefined(HAVE_HIP)
212     PetscBool iship;
213     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214     if (iship) mtype = MATSEQDENSEHIP;
215 #endif
216     PetscCall(MatSetType(a->A, mtype));
217   }
218   PetscCall(MatDenseSetLDA(a->A, lda));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223 {
224   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228   PetscCall(MatDenseGetArray(a->A, array));
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233 {
234   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
235 
236   PetscFunctionBegin;
237   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243 {
244   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
245 
246   PetscFunctionBegin;
247   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248   PetscCall(MatDenseGetArrayWrite(a->A, array));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253 {
254   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
255 
256   PetscFunctionBegin;
257   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259   PetscCall(MatDensePlaceArray(a->A, array));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264 {
265   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
266 
267   PetscFunctionBegin;
268   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270   PetscCall(MatDenseResetArray(a->A));
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275 {
276   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
277 
278   PetscFunctionBegin;
279   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281   PetscCall(MatDenseReplaceArray(a->A, array));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286 {
287   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
288   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289   const PetscInt    *irow, *icol;
290   const PetscScalar *v;
291   PetscScalar       *bv;
292   Mat                newmat;
293   IS                 iscol_local;
294   MPI_Comm           comm_is, comm_mat;
295 
296   PetscFunctionBegin;
297   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
300 
301   PetscCall(ISAllGather(iscol, &iscol_local));
302   PetscCall(ISGetIndices(isrow, &irow));
303   PetscCall(ISGetIndices(iscol_local, &icol));
304   PetscCall(ISGetLocalSize(isrow, &nrows));
305   PetscCall(ISGetLocalSize(iscol, &ncols));
306   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
307 
308   /* No parallel redistribution currently supported! Should really check each index set
309      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
310      original matrix! */
311 
312   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
314 
315   /* Check submatrix call */
316   if (scall == MAT_REUSE_MATRIX) {
317     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318     /* Really need to test rows and column sizes! */
319     newmat = *B;
320   } else {
321     /* Create and fill new matrix */
322     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326   }
327 
328   /* Now extract the data pointers and do the copy, column at a time */
329   newmatd = (Mat_MPIDense *)newmat->data;
330   PetscCall(MatDenseGetArray(newmatd->A, &bv));
331   PetscCall(MatDenseGetArrayRead(mat->A, &v));
332   PetscCall(MatDenseGetLDA(mat->A, &lda));
333   for (i = 0; i < Ncols; i++) {
334     const PetscScalar *av = v + lda * icol[i];
335     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336   }
337   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
339 
340   /* Assemble the matrices so that the correct flags are set */
341   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
343 
344   /* Free work space */
345   PetscCall(ISRestoreIndices(isrow, &irow));
346   PetscCall(ISRestoreIndices(iscol_local, &icol));
347   PetscCall(ISDestroy(&iscol_local));
348   *B = newmat;
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353 {
354   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
355 
356   PetscFunctionBegin;
357   PetscCall(MatDenseRestoreArray(a->A, array));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362 {
363   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
364 
365   PetscFunctionBegin;
366   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369 
370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371 {
372   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
373 
374   PetscFunctionBegin;
375   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376   PetscFunctionReturn(PETSC_SUCCESS);
377 }
378 
379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380 {
381   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382   PetscInt      nstash, reallocs;
383 
384   PetscFunctionBegin;
385   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
386 
387   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396   PetscInt      i, *row, *col, flg, j, rstart, ncols;
397   PetscMPIInt   n;
398   PetscScalar  *val;
399 
400   PetscFunctionBegin;
401   if (!mdn->donotstash && !mat->nooffprocentries) {
402     /*  wait on receives */
403     while (1) {
404       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405       if (!flg) break;
406 
407       for (i = 0; i < n;) {
408         /* Now identify the consecutive vals belonging to the same row */
409         for (j = i, rstart = row[j]; j < n; j++) {
410           if (row[j] != rstart) break;
411         }
412         if (j < n) ncols = j - i;
413         else ncols = n - i;
414         /* Now assemble all these values with a single function call */
415         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416         i = j;
417       }
418     }
419     PetscCall(MatStashScatterEnd_Private(&mat->stash));
420   }
421 
422   PetscCall(MatAssemblyBegin(mdn->A, mode));
423   PetscCall(MatAssemblyEnd(mdn->A, mode));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428 {
429   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
430 
431   PetscFunctionBegin;
432   PetscCall(MatZeroEntries(l->A));
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437 {
438   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439   PetscInt      i, len, *lrows;
440 
441   PetscFunctionBegin;
442   /* get locally owned rows */
443   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444   /* fix right-hand side if needed */
445   if (x && b) {
446     const PetscScalar *xx;
447     PetscScalar       *bb;
448 
449     PetscCall(VecGetArrayRead(x, &xx));
450     PetscCall(VecGetArrayWrite(b, &bb));
451     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452     PetscCall(VecRestoreArrayRead(x, &xx));
453     PetscCall(VecRestoreArrayWrite(b, &bb));
454   }
455   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456   if (diag != 0.0) {
457     Vec d;
458 
459     PetscCall(MatCreateVecs(A, NULL, &d));
460     PetscCall(VecSet(d, diag));
461     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462     PetscCall(VecDestroy(&d));
463   }
464   PetscCall(PetscFree(lrows));
465   PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 
468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
472 
473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474 {
475   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType       axmtype, aymtype;
479 
480   PetscFunctionBegin;
481   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493 {
494   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType       axmtype, aymtype;
498 
499   PetscFunctionBegin;
500   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512 {
513   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType       axmtype, aymtype;
517 
518   PetscFunctionBegin;
519   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555   PetscInt           r_start, r_end;
556   PetscInt           c_start_local, c_end_local;
557 
558   PetscFunctionBegin;
559   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560   PetscCall(VecZeroEntries(a->lvec));
561   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562   c_start_local = PetscMax(c_start, r_start);
563   c_end_local   = PetscMin(c_end, r_end);
564   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565   if (c_end_local > c_start_local) {
566     if (PetscMemTypeHost(aymtype)) {
567       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568     } else {
569       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571     }
572   }
573   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583 {
584   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
585   const PetscScalar *ax;
586   PetscScalar       *ay;
587   PetscMemType       axmtype, aymtype;
588 
589   PetscFunctionBegin;
590   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591   PetscCall(VecSet(yy, 0.0));
592   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604 {
605   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
606   const PetscScalar *ax;
607   PetscScalar       *ay;
608   PetscMemType       axmtype, aymtype;
609 
610   PetscFunctionBegin;
611   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612   PetscCall(VecCopy(yy, zz));
613   PetscCall(VecZeroEntries(a->lvec));
614   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625 {
626   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
627   const PetscScalar *ax;
628   PetscScalar       *ay;
629   PetscMemType       axmtype, aymtype;
630 
631   PetscFunctionBegin;
632   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633   PetscCall(VecCopy(yy, zz));
634   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646 {
647   PetscFunctionBegin;
648   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   PetscFunctionBegin;
655   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660 {
661   PetscFunctionBegin;
662   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667 {
668   PetscFunctionBegin;
669   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674 {
675   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
676   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
677   PetscScalar       *x;
678   const PetscScalar *av;
679 
680   PetscFunctionBegin;
681   PetscCall(VecGetArray(v, &x));
682   PetscCall(VecGetSize(v, &ng));
683   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684   PetscCall(VecGetLocalSize(v, &nl));
685   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
686   radd = A->rmap->rstart * m;
687   PetscCall(MatDenseGetArrayRead(a->A, &av));
688   PetscCall(MatDenseGetLDA(a->A, &lda));
689   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692   PetscCall(VecRestoreArray(v, &x));
693   PetscFunctionReturn(PETSC_SUCCESS);
694 }
695 
696 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697 {
698   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
699 
700   PetscFunctionBegin;
701   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702   PetscCall(MatStashDestroy_Private(&mat->stash));
703   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705   PetscCall(MatDestroy(&mdn->A));
706   PetscCall(VecDestroy(&mdn->lvec));
707   PetscCall(PetscSFDestroy(&mdn->Mvctx));
708   PetscCall(VecDestroy(&mdn->cvec));
709   PetscCall(MatDestroy(&mdn->cmat));
710 
711   PetscCall(PetscFree(mat->data));
712   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
713 
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727 #if defined(PETSC_HAVE_ELEMENTAL)
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729 #endif
730 #if defined(PETSC_HAVE_SCALAPACK)
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732 #endif
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736 #if defined(PETSC_HAVE_CUDA)
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755 #endif
756 #if defined(PETSC_HAVE_HIP)
757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775 #endif
776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
790 
791   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 #include <petscdraw.h>
796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797 {
798   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
799   PetscMPIInt       rank;
800   PetscViewerType   vtype;
801   PetscBool         isascii, isdraw;
802   PetscViewer       sviewer;
803   PetscViewerFormat format;
804 
805   PetscFunctionBegin;
806   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809   if (isascii) {
810     PetscCall(PetscViewerGetType(viewer, &vtype));
811     PetscCall(PetscViewerGetFormat(viewer, &format));
812     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813       MatInfo info;
814       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817                                                    (PetscInt)info.memory));
818       PetscCall(PetscViewerFlush(viewer));
819       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821       PetscFunctionReturn(PETSC_SUCCESS);
822     } else if (format == PETSC_VIEWER_ASCII_INFO) {
823       PetscFunctionReturn(PETSC_SUCCESS);
824     }
825   } else if (isdraw) {
826     PetscDraw draw;
827     PetscBool isnull;
828 
829     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830     PetscCall(PetscDrawIsNull(draw, &isnull));
831     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832   }
833 
834   {
835     /* assemble the entire matrix onto first processor. */
836     Mat          A;
837     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838     PetscInt    *cols;
839     PetscScalar *vals;
840 
841     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842     if (rank == 0) {
843       PetscCall(MatSetSizes(A, M, N, M, N));
844     } else {
845       PetscCall(MatSetSizes(A, 0, 0, M, N));
846     }
847     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848     PetscCall(MatSetType(A, MATMPIDENSE));
849     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
850 
851     /* Copy the matrix ... This isn't the most efficient means,
852        but it's quick for now */
853     A->insertmode = INSERT_VALUES;
854 
855     row = mat->rmap->rstart;
856     m   = mdn->A->rmap->n;
857     for (i = 0; i < m; i++) {
858       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861       row++;
862     }
863 
864     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867     if (rank == 0) {
868       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870     }
871     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872     PetscCall(MatDestroy(&A));
873   }
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878 {
879   PetscBool isascii, isbinary, isdraw, issocket;
880 
881   PetscFunctionBegin;
882   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
886 
887   if (isascii || issocket || isdraw) {
888     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
889   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
894 {
895   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
896   Mat            mdn = mat->A;
897   PetscLogDouble isend[5], irecv[5];
898 
899   PetscFunctionBegin;
900   info->block_size = 1.0;
901 
902   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
903 
904   isend[0] = info->nz_used;
905   isend[1] = info->nz_allocated;
906   isend[2] = info->nz_unneeded;
907   isend[3] = info->memory;
908   isend[4] = info->mallocs;
909   if (flag == MAT_LOCAL) {
910     info->nz_used      = isend[0];
911     info->nz_allocated = isend[1];
912     info->nz_unneeded  = isend[2];
913     info->memory       = isend[3];
914     info->mallocs      = isend[4];
915   } else if (flag == MAT_GLOBAL_MAX) {
916     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
917 
918     info->nz_used      = irecv[0];
919     info->nz_allocated = irecv[1];
920     info->nz_unneeded  = irecv[2];
921     info->memory       = irecv[3];
922     info->mallocs      = irecv[4];
923   } else if (flag == MAT_GLOBAL_SUM) {
924     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
925 
926     info->nz_used      = irecv[0];
927     info->nz_allocated = irecv[1];
928     info->nz_unneeded  = irecv[2];
929     info->memory       = irecv[3];
930     info->mallocs      = irecv[4];
931   }
932   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
933   info->fill_ratio_needed = 0;
934   info->factor_mallocs    = 0;
935   PetscFunctionReturn(PETSC_SUCCESS);
936 }
937 
938 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
939 {
940   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
941 
942   PetscFunctionBegin;
943   switch (op) {
944   case MAT_NEW_NONZERO_LOCATIONS:
945   case MAT_NEW_NONZERO_LOCATION_ERR:
946   case MAT_NEW_NONZERO_ALLOCATION_ERR:
947     MatCheckPreallocated(A, 1);
948     PetscCall(MatSetOption(a->A, op, flg));
949     break;
950   case MAT_ROW_ORIENTED:
951     MatCheckPreallocated(A, 1);
952     a->roworiented = flg;
953     PetscCall(MatSetOption(a->A, op, flg));
954     break;
955   case MAT_IGNORE_OFF_PROC_ENTRIES:
956     a->donotstash = flg;
957     break;
958   case MAT_SYMMETRIC:
959   case MAT_STRUCTURALLY_SYMMETRIC:
960   case MAT_HERMITIAN:
961   case MAT_SYMMETRY_ETERNAL:
962   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
963   case MAT_SPD:
964   case MAT_SPD_ETERNAL:
965     /* if the diagonal matrix is square it inherits some of the properties above */
966     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
967     break;
968   default:
969     break;
970   }
971   PetscFunctionReturn(PETSC_SUCCESS);
972 }
973 
974 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
975 {
976   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
977   const PetscScalar *l;
978   PetscScalar        x, *v, *vv, *r;
979   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
980 
981   PetscFunctionBegin;
982   PetscCall(MatDenseGetArray(mdn->A, &vv));
983   PetscCall(MatDenseGetLDA(mdn->A, &lda));
984   PetscCall(MatGetLocalSize(A, &s2, &s3));
985   if (ll) {
986     PetscCall(VecGetLocalSize(ll, &s2a));
987     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
988     PetscCall(VecGetArrayRead(ll, &l));
989     for (i = 0; i < m; i++) {
990       x = l[i];
991       v = vv + i;
992       for (j = 0; j < n; j++) {
993         (*v) *= x;
994         v += lda;
995       }
996     }
997     PetscCall(VecRestoreArrayRead(ll, &l));
998     PetscCall(PetscLogFlops(1.0 * n * m));
999   }
1000   if (rr) {
1001     const PetscScalar *ar;
1002 
1003     PetscCall(VecGetLocalSize(rr, &s3a));
1004     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1005     PetscCall(VecGetArrayRead(rr, &ar));
1006     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1007     PetscCall(VecGetArray(mdn->lvec, &r));
1008     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1010     PetscCall(VecRestoreArrayRead(rr, &ar));
1011     for (i = 0; i < n; i++) {
1012       x = r[i];
1013       v = vv + i * lda;
1014       for (j = 0; j < m; j++) (*v++) *= x;
1015     }
1016     PetscCall(VecRestoreArray(mdn->lvec, &r));
1017     PetscCall(PetscLogFlops(1.0 * n * m));
1018   }
1019   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1020   PetscFunctionReturn(PETSC_SUCCESS);
1021 }
1022 
1023 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1024 {
1025   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1026   PetscInt           i, j;
1027   PetscMPIInt        size;
1028   PetscReal          sum = 0.0;
1029   const PetscScalar *av, *v;
1030 
1031   PetscFunctionBegin;
1032   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1033   v = av;
1034   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1035   if (size == 1) {
1036     PetscCall(MatNorm(mdn->A, type, nrm));
1037   } else {
1038     if (type == NORM_FROBENIUS) {
1039       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1040         sum += PetscRealPart(PetscConj(*v) * (*v));
1041         v++;
1042       }
1043       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1044       *nrm = PetscSqrtReal(*nrm);
1045       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1046     } else if (type == NORM_1) {
1047       PetscReal *tmp;
1048 
1049       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1050       *nrm = 0.0;
1051       v    = av;
1052       for (j = 0; j < mdn->A->cmap->n; j++) {
1053         for (i = 0; i < mdn->A->rmap->n; i++) {
1054           tmp[j] += PetscAbsScalar(*v);
1055           v++;
1056         }
1057       }
1058       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1059       for (j = 0; j < A->cmap->N; j++) {
1060         if (tmp[j] > *nrm) *nrm = tmp[j];
1061       }
1062       PetscCall(PetscFree(tmp));
1063       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1064     } else if (type == NORM_INFINITY) { /* max row norm */
1065       PetscCall(MatNorm(mdn->A, type, nrm));
1066       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1067     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1068   }
1069   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1070   PetscFunctionReturn(PETSC_SUCCESS);
1071 }
1072 
1073 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1074 {
1075   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1076   Mat           B;
1077   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1078   PetscInt      j, i, lda;
1079   PetscScalar  *v;
1080 
1081   PetscFunctionBegin;
1082   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1083   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1084     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1085     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1086     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1087     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1088   } else B = *matout;
1089 
1090   m = a->A->rmap->n;
1091   n = a->A->cmap->n;
1092   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1093   PetscCall(MatDenseGetLDA(a->A, &lda));
1094   PetscCall(PetscMalloc1(m, &rwork));
1095   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1096   for (j = 0; j < n; j++) {
1097     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1098     v = PetscSafePointerPlusOffset(v, lda);
1099   }
1100   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1101   PetscCall(PetscFree(rwork));
1102   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1103   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1104   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1105     *matout = B;
1106   } else {
1107     PetscCall(MatHeaderMerge(A, &B));
1108   }
1109   PetscFunctionReturn(PETSC_SUCCESS);
1110 }
1111 
1112 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1113 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1114 
1115 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1116 {
1117   PetscFunctionBegin;
1118   PetscCall(PetscLayoutSetUp(A->rmap));
1119   PetscCall(PetscLayoutSetUp(A->cmap));
1120   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1121   PetscFunctionReturn(PETSC_SUCCESS);
1122 }
1123 
1124 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1125 {
1126   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1127 
1128   PetscFunctionBegin;
1129   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1134 {
1135   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(MatConjugate(a->A));
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
1142 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1143 {
1144   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCall(MatRealPart(a->A));
1148   PetscFunctionReturn(PETSC_SUCCESS);
1149 }
1150 
1151 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1152 {
1153   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1154 
1155   PetscFunctionBegin;
1156   PetscCall(MatImaginaryPart(a->A));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1161 {
1162   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1163 
1164   PetscFunctionBegin;
1165   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1166   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1167   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1168   PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170 
1171 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1172 
1173 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1174 {
1175   PetscInt      i, m, n;
1176   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatGetSize(A, &m, &n));
1180   if (type == REDUCTION_MEAN_REALPART) {
1181     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1182   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1183     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1184   } else {
1185     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1186   }
1187   if (type == NORM_2) {
1188     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1189   }
1190   if (type == NORM_INFINITY) {
1191     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1192   } else {
1193     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1194   }
1195   if (type == NORM_2) {
1196     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1197   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1198     for (i = 0; i < n; i++) reductions[i] /= m;
1199   }
1200   PetscFunctionReturn(PETSC_SUCCESS);
1201 }
1202 
1203 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1204 {
1205   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1206 
1207   PetscFunctionBegin;
1208   PetscCall(MatSetRandom(d->A, rctx));
1209 #if defined(PETSC_HAVE_DEVICE)
1210   x->offloadmask = d->A->offloadmask;
1211 #endif
1212   PetscFunctionReturn(PETSC_SUCCESS);
1213 }
1214 
1215 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1216 {
1217   PetscFunctionBegin;
1218   *missing = PETSC_FALSE;
1219   PetscFunctionReturn(PETSC_SUCCESS);
1220 }
1221 
1222 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1223 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1224 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1225 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1226 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1227 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1228 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1229 
1230 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1231                                        MatGetRow_MPIDense,
1232                                        MatRestoreRow_MPIDense,
1233                                        MatMult_MPIDense,
1234                                        /*  4*/ MatMultAdd_MPIDense,
1235                                        MatMultTranspose_MPIDense,
1236                                        MatMultTransposeAdd_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        /* 10*/ NULL,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        MatTranspose_MPIDense,
1245                                        /* 15*/ MatGetInfo_MPIDense,
1246                                        MatEqual_MPIDense,
1247                                        MatGetDiagonal_MPIDense,
1248                                        MatDiagonalScale_MPIDense,
1249                                        MatNorm_MPIDense,
1250                                        /* 20*/ MatAssemblyBegin_MPIDense,
1251                                        MatAssemblyEnd_MPIDense,
1252                                        MatSetOption_MPIDense,
1253                                        MatZeroEntries_MPIDense,
1254                                        /* 24*/ MatZeroRows_MPIDense,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        /* 29*/ MatSetUp_MPIDense,
1260                                        NULL,
1261                                        NULL,
1262                                        MatGetDiagonalBlock_MPIDense,
1263                                        NULL,
1264                                        /* 34*/ MatDuplicate_MPIDense,
1265                                        NULL,
1266                                        NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        /* 39*/ MatAXPY_MPIDense,
1270                                        MatCreateSubMatrices_MPIDense,
1271                                        NULL,
1272                                        MatGetValues_MPIDense,
1273                                        MatCopy_MPIDense,
1274                                        /* 44*/ NULL,
1275                                        MatScale_MPIDense,
1276                                        MatShift_MPIDense,
1277                                        NULL,
1278                                        NULL,
1279                                        /* 49*/ MatSetRandom_MPIDense,
1280                                        NULL,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        /* 54*/ NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1290                                        MatDestroy_MPIDense,
1291                                        MatView_MPIDense,
1292                                        NULL,
1293                                        NULL,
1294                                        /* 64*/ NULL,
1295                                        NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        /* 69*/ NULL,
1300                                        NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        /* 74*/ NULL,
1305                                        NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        MatLoad_MPIDense,
1309                                        /* 79*/ NULL,
1310                                        NULL,
1311                                        NULL,
1312                                        NULL,
1313                                        /* 83*/ NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        NULL,
1317                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1318                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1319                                        /* 89*/ NULL,
1320                                        MatProductSetFromOptions_MPIDense,
1321                                        NULL,
1322                                        NULL,
1323                                        MatConjugate_MPIDense,
1324                                        /* 94*/ NULL,
1325                                        NULL,
1326                                        MatRealPart_MPIDense,
1327                                        MatImaginaryPart_MPIDense,
1328                                        NULL,
1329                                        /*99*/ NULL,
1330                                        NULL,
1331                                        NULL,
1332                                        NULL,
1333                                        MatGetColumnVector_MPIDense,
1334                                        /*104*/ MatMissingDiagonal_MPIDense,
1335                                        NULL,
1336                                        NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        /*109*/ NULL,
1340                                        NULL,
1341                                        NULL,
1342                                        MatMultHermitianTranspose_MPIDense,
1343                                        MatMultHermitianTransposeAdd_MPIDense,
1344                                        /*114*/ NULL,
1345                                        NULL,
1346                                        MatGetColumnReductions_MPIDense,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        /*120*/ NULL,
1351                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1352                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1353                                        NULL,
1354                                        /*124*/ NULL,
1355                                        NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        /*129*/ NULL,
1360                                        NULL,
1361                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1362                                        NULL,
1363                                        NULL,
1364                                        /*134*/ NULL,
1365                                        NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        /*139*/ NULL,
1370                                        NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL};
1374 
1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1376 {
1377   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1378   MatType       mtype = MATSEQDENSE;
1379 
1380   PetscFunctionBegin;
1381   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1382   PetscCall(PetscLayoutSetUp(mat->rmap));
1383   PetscCall(PetscLayoutSetUp(mat->cmap));
1384   if (!a->A) {
1385     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1386     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1387   }
1388 #if defined(PETSC_HAVE_CUDA)
1389   PetscBool iscuda;
1390   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1391   if (iscuda) mtype = MATSEQDENSECUDA;
1392 #endif
1393 #if defined(PETSC_HAVE_HIP)
1394   PetscBool iship;
1395   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1396   if (iship) mtype = MATSEQDENSEHIP;
1397 #endif
1398   PetscCall(MatSetType(a->A, mtype));
1399   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1401   mat->offloadmask = a->A->offloadmask;
1402 #endif
1403   mat->preallocated = PETSC_TRUE;
1404   mat->assembled    = PETSC_TRUE;
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1409 {
1410   Mat B, C;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1414   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1415   PetscCall(MatDestroy(&C));
1416   if (reuse == MAT_REUSE_MATRIX) {
1417     C = *newmat;
1418   } else C = NULL;
1419   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1420   PetscCall(MatDestroy(&B));
1421   if (reuse == MAT_INPLACE_MATRIX) {
1422     PetscCall(MatHeaderReplace(A, &C));
1423   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1428 {
1429   Mat B, C;
1430 
1431   PetscFunctionBegin;
1432   PetscCall(MatDenseGetLocalMatrix(A, &C));
1433   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1434   if (reuse == MAT_REUSE_MATRIX) {
1435     C = *newmat;
1436   } else C = NULL;
1437   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1438   PetscCall(MatDestroy(&B));
1439   if (reuse == MAT_INPLACE_MATRIX) {
1440     PetscCall(MatHeaderReplace(A, &C));
1441   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 }
1444 
1445 #if defined(PETSC_HAVE_ELEMENTAL)
1446 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1447 {
1448   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1449   Mat           mat_elemental;
1450   PetscScalar  *v;
1451   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1452 
1453   PetscFunctionBegin;
1454   if (reuse == MAT_REUSE_MATRIX) {
1455     mat_elemental = *newmat;
1456     PetscCall(MatZeroEntries(*newmat));
1457   } else {
1458     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1459     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1460     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1461     PetscCall(MatSetUp(mat_elemental));
1462     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1463   }
1464 
1465   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1466   for (i = 0; i < N; i++) cols[i] = i;
1467   for (i = 0; i < m; i++) rows[i] = rstart + i;
1468 
1469   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1470   PetscCall(MatDenseGetArray(A, &v));
1471   PetscCall(MatDenseGetLDA(a->A, &lda));
1472   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1473   else {
1474     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1475   }
1476   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1477   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1478   PetscCall(MatDenseRestoreArray(A, &v));
1479   PetscCall(PetscFree2(rows, cols));
1480 
1481   if (reuse == MAT_INPLACE_MATRIX) {
1482     PetscCall(MatHeaderReplace(A, &mat_elemental));
1483   } else {
1484     *newmat = mat_elemental;
1485   }
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 #endif
1489 
1490 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1491 {
1492   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1493 
1494   PetscFunctionBegin;
1495   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1500 {
1501   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1505   PetscFunctionReturn(PETSC_SUCCESS);
1506 }
1507 
1508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1509 {
1510   Mat_MPIDense *mat;
1511   PetscInt      m, nloc, N;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatGetSize(inmat, &m, &N));
1515   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1516   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1517     PetscInt sum;
1518 
1519     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1520     /* Check sum(n) = N */
1521     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1522     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1523 
1524     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1525     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1526   }
1527 
1528   /* numeric phase */
1529   mat = (Mat_MPIDense *)(*outmat)->data;
1530   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1531   PetscFunctionReturn(PETSC_SUCCESS);
1532 }
1533 
1534 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1535 {
1536   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1537   PetscInt      lda;
1538 
1539   PetscFunctionBegin;
1540   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1541   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1542   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1543   a->vecinuse = col + 1;
1544   PetscCall(MatDenseGetLDA(a->A, &lda));
1545   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1546   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1547   *v = a->cvec;
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1552 {
1553   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1554 
1555   PetscFunctionBegin;
1556   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1557   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1558   VecCheckAssembled(a->cvec);
1559   a->vecinuse = 0;
1560   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1561   PetscCall(VecResetArray(a->cvec));
1562   if (v) *v = NULL;
1563   PetscFunctionReturn(PETSC_SUCCESS);
1564 }
1565 
1566 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1567 {
1568   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1569   PetscInt      lda;
1570 
1571   PetscFunctionBegin;
1572   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1573   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1574   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1575   a->vecinuse = col + 1;
1576   PetscCall(MatDenseGetLDA(a->A, &lda));
1577   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1578   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1579   PetscCall(VecLockReadPush(a->cvec));
1580   *v = a->cvec;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1585 {
1586   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1590   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1591   VecCheckAssembled(a->cvec);
1592   a->vecinuse = 0;
1593   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1594   PetscCall(VecLockReadPop(a->cvec));
1595   PetscCall(VecResetArray(a->cvec));
1596   if (v) *v = NULL;
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1601 {
1602   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1603   PetscInt      lda;
1604 
1605   PetscFunctionBegin;
1606   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1607   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1608   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1609   a->vecinuse = col + 1;
1610   PetscCall(MatDenseGetLDA(a->A, &lda));
1611   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1612   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1613   *v = a->cvec;
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
1617 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1618 {
1619   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1623   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1624   VecCheckAssembled(a->cvec);
1625   a->vecinuse = 0;
1626   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1627   PetscCall(VecResetArray(a->cvec));
1628   if (v) *v = NULL;
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1633 {
1634   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1635   Mat_MPIDense *c;
1636   MPI_Comm      comm;
1637   PetscInt      prbegin, prend, pcbegin, pcend;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1641   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1642   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1643   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1644   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1645   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1646   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1647   if (!a->cmat) {
1648     PetscCall(MatCreate(comm, &a->cmat));
1649     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1650     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1651     else {
1652       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1653       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1654       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1655     }
1656     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1657     else {
1658       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1659       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1660       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1661     }
1662     c             = (Mat_MPIDense *)a->cmat->data;
1663     c->sub_rbegin = rbegin;
1664     c->sub_rend   = rend;
1665     c->sub_cbegin = cbegin;
1666     c->sub_cend   = cend;
1667   }
1668   c = (Mat_MPIDense *)a->cmat->data;
1669   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1670     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1671     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1672     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1673     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1674     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1675     c->sub_rbegin = rbegin;
1676     c->sub_rend   = rend;
1677   }
1678   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1679     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1680     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1681       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1682       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1683       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1684       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1685       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1686       PetscCall(VecDestroy(&c->lvec));
1687       PetscCall(PetscSFDestroy(&c->Mvctx));
1688     }
1689     c->sub_cbegin = cbegin;
1690     c->sub_cend   = cend;
1691   }
1692   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1693   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1694 
1695   a->cmat->preallocated = PETSC_TRUE;
1696   a->cmat->assembled    = PETSC_TRUE;
1697 #if defined(PETSC_HAVE_DEVICE)
1698   a->cmat->offloadmask = c->A->offloadmask;
1699 #endif
1700   a->matinuse = cbegin + 1;
1701   *v          = a->cmat;
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1706 {
1707   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1708   Mat_MPIDense *c;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1712   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1713   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1714   a->matinuse = 0;
1715   c           = (Mat_MPIDense *)a->cmat->data;
1716   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1717   if (v) *v = NULL;
1718 #if defined(PETSC_HAVE_DEVICE)
1719   A->offloadmask = a->A->offloadmask;
1720 #endif
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 /*MC
1725    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1726 
1727    Options Database Key:
1728 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1729 
1730   Level: beginner
1731 
1732 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1733 M*/
1734 PetscErrorCode MatCreate_MPIDense(Mat mat)
1735 {
1736   Mat_MPIDense *a;
1737 
1738   PetscFunctionBegin;
1739   PetscCall(PetscNew(&a));
1740   mat->data   = (void *)a;
1741   mat->ops[0] = MatOps_Values;
1742 
1743   mat->insertmode = NOT_SET_VALUES;
1744 
1745   /* build cache for off array entries formed */
1746   a->donotstash = PETSC_FALSE;
1747 
1748   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1749 
1750   /* stuff used for matrix vector multiply */
1751   a->lvec        = NULL;
1752   a->Mvctx       = NULL;
1753   a->roworiented = PETSC_TRUE;
1754 
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1776 #if defined(PETSC_HAVE_ELEMENTAL)
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1778 #endif
1779 #if defined(PETSC_HAVE_SCALAPACK)
1780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1781 #endif
1782 #if defined(PETSC_HAVE_CUDA)
1783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1784 #endif
1785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1788 #if defined(PETSC_HAVE_CUDA)
1789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1791 #endif
1792 #if defined(PETSC_HAVE_HIP)
1793   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1794   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1795   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1796 #endif
1797   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1798   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1799   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1802   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1803   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 /*MC
1808    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1809 
1810    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1811    and `MATMPIDENSE` otherwise.
1812 
1813    Options Database Key:
1814 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1815 
1816   Level: beginner
1817 
1818 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1819 M*/
1820 
1821 /*@
1822   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1823 
1824   Collective
1825 
1826   Input Parameters:
1827 + B    - the matrix
1828 - data - optional location of matrix data.  Set to `NULL` for PETSc
1829          to control all matrix memory allocation.
1830 
1831   Level: intermediate
1832 
1833   Notes:
1834   The dense format is fully compatible with standard Fortran
1835   storage by columns.
1836 
1837   The data input variable is intended primarily for Fortran programmers
1838   who wish to allocate their own matrix memory space.  Most users should
1839   set `data` to `NULL`.
1840 
1841 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1842 @*/
1843 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1844 {
1845   PetscFunctionBegin;
1846   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1847   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
1851 /*@
1852   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1853   array provided by the user. This is useful to avoid copying an array
1854   into a matrix
1855 
1856   Not Collective
1857 
1858   Input Parameters:
1859 + mat   - the matrix
1860 - array - the array in column major order
1861 
1862   Level: developer
1863 
1864   Note:
1865   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1866 
1867   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1868   freed when the matrix is destroyed.
1869 
1870 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1871           `MatDenseReplaceArray()`
1872 @*/
1873 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1874 {
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1877   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1878   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1879 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1880   mat->offloadmask = PETSC_OFFLOAD_CPU;
1881 #endif
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 /*@
1886   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1887 
1888   Not Collective
1889 
1890   Input Parameter:
1891 . mat - the matrix
1892 
1893   Level: developer
1894 
1895   Note:
1896   You can only call this after a call to `MatDensePlaceArray()`
1897 
1898 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1899 @*/
1900 PetscErrorCode MatDenseResetArray(Mat mat)
1901 {
1902   PetscFunctionBegin;
1903   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1904   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1905   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1906   PetscFunctionReturn(PETSC_SUCCESS);
1907 }
1908 
1909 /*@
1910   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1911   array provided by the user. This is useful to avoid copying an array
1912   into a matrix
1913 
1914   Not Collective
1915 
1916   Input Parameters:
1917 + mat   - the matrix
1918 - array - the array in column major order
1919 
1920   Level: developer
1921 
1922   Note:
1923   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1924 
1925   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1926   freed by the user. It will be freed when the matrix is destroyed.
1927 
1928 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1929 @*/
1930 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1931 {
1932   PetscFunctionBegin;
1933   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1934   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1935   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1936 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1937   mat->offloadmask = PETSC_OFFLOAD_CPU;
1938 #endif
1939   PetscFunctionReturn(PETSC_SUCCESS);
1940 }
1941 
1942 /*@
1943   MatCreateDense - Creates a matrix in `MATDENSE` format.
1944 
1945   Collective
1946 
1947   Input Parameters:
1948 + comm - MPI communicator
1949 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1950 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1951 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1952 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1953 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1954    to control all matrix memory allocation.
1955 
1956   Output Parameter:
1957 . A - the matrix
1958 
1959   Level: intermediate
1960 
1961   Notes:
1962   The dense format is fully compatible with standard Fortran
1963   storage by columns.
1964 
1965   Although local portions of the matrix are stored in column-major
1966   order, the matrix is partitioned across MPI ranks by row.
1967 
1968   The data input variable is intended primarily for Fortran programmers
1969   who wish to allocate their own matrix memory space.  Most users should
1970   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1971 
1972   The user MUST specify either the local or global matrix dimensions
1973   (possibly both).
1974 
1975 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1976 @*/
1977 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1978 {
1979   PetscFunctionBegin;
1980   PetscCall(MatCreate(comm, A));
1981   PetscCall(MatSetSizes(*A, m, n, M, N));
1982   PetscCall(MatSetType(*A, MATDENSE));
1983   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1984   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1985   PetscFunctionReturn(PETSC_SUCCESS);
1986 }
1987 
1988 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1989 {
1990   Mat           mat;
1991   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1992 
1993   PetscFunctionBegin;
1994   *newmat = NULL;
1995   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1996   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1997   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1998   a = (Mat_MPIDense *)mat->data;
1999 
2000   mat->factortype   = A->factortype;
2001   mat->assembled    = PETSC_TRUE;
2002   mat->preallocated = PETSC_TRUE;
2003 
2004   mat->insertmode = NOT_SET_VALUES;
2005   a->donotstash   = oldmat->donotstash;
2006 
2007   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2008   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2009 
2010   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2011 
2012   *newmat = mat;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2017 {
2018   PetscBool isbinary;
2019 #if defined(PETSC_HAVE_HDF5)
2020   PetscBool ishdf5;
2021 #endif
2022 
2023   PetscFunctionBegin;
2024   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2025   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2026   /* force binary viewer to load .info file if it has not yet done so */
2027   PetscCall(PetscViewerSetUp(viewer));
2028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2029 #if defined(PETSC_HAVE_HDF5)
2030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2031 #endif
2032   if (isbinary) {
2033     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2034 #if defined(PETSC_HAVE_HDF5)
2035   } else if (ishdf5) {
2036     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2037 #endif
2038   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2039   PetscFunctionReturn(PETSC_SUCCESS);
2040 }
2041 
2042 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2043 {
2044   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2045   Mat           a, b;
2046 
2047   PetscFunctionBegin;
2048   a = matA->A;
2049   b = matB->A;
2050   PetscCall(MatEqual(a, b, flag));
2051   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2056 {
2057   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2061   PetscCall(MatDestroy(&atb->atb));
2062   PetscCall(PetscFree(atb));
2063   PetscFunctionReturn(PETSC_SUCCESS);
2064 }
2065 
2066 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2067 {
2068   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2072   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2073   PetscCall(PetscFree(abt));
2074   PetscFunctionReturn(PETSC_SUCCESS);
2075 }
2076 
2077 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2078 {
2079   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2080   Mat_TransMatMultDense *atb;
2081   MPI_Comm               comm;
2082   PetscMPIInt            size, *recvcounts;
2083   PetscScalar           *carray, *sendbuf;
2084   const PetscScalar     *atbarray;
2085   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2086   const PetscInt        *ranges;
2087 
2088   PetscFunctionBegin;
2089   MatCheckProduct(C, 3);
2090   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2091   atb        = (Mat_TransMatMultDense *)C->product->data;
2092   recvcounts = atb->recvcounts;
2093   sendbuf    = atb->sendbuf;
2094 
2095   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2096   PetscCallMPI(MPI_Comm_size(comm, &size));
2097 
2098   /* compute atbarray = aseq^T * bseq */
2099   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2100 
2101   PetscCall(MatGetOwnershipRanges(C, &ranges));
2102 
2103   if (ranges[1] == C->rmap->N) {
2104     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2105     PetscInt           atb_lda, c_lda;
2106     Mat                atb_local = atb->atb;
2107     Mat                atb_alloc = NULL;
2108     Mat                c_local   = c->A;
2109     Mat                c_alloc   = NULL;
2110     PetscMemType       atb_memtype, c_memtype;
2111     const PetscScalar *atb_array = NULL;
2112     MPI_Datatype       vector_type;
2113     PetscScalar       *c_array = NULL;
2114     PetscMPIInt        rank;
2115 
2116     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2117 
2118     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2119     if (atb_lda != C->rmap->N) {
2120       // copy atb to a matrix that will have lda == the number of rows
2121       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2122       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2123       atb_local = atb_alloc;
2124     }
2125 
2126     if (rank == 0) {
2127       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2128       if (c_lda != C->rmap->N) {
2129         // copy c to a matrix that will have lda == the number of rows
2130         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2131         c_local = c_alloc;
2132       }
2133       PetscCall(MatZeroEntries(c_local));
2134     }
2135     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2136      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2137     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2138     vector_type = MPIU_SCALAR;
2139     if (B->cmap->N > 1) {
2140       PetscMPIInt mpi_N;
2141 
2142       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2143       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2144       PetscCallMPI(MPI_Type_commit(&vector_type));
2145     }
2146     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2147     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2148     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2149     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2150     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2151     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2152     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2153     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2154     PetscCall(MatDestroy(&atb_alloc));
2155     PetscCall(MatDestroy(&c_alloc));
2156     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2157     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2158     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2159     PetscFunctionReturn(PETSC_SUCCESS);
2160   }
2161 
2162   /* arrange atbarray into sendbuf */
2163   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2164   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2165   for (proc = 0, k = 0; proc < size; proc++) {
2166     for (j = 0; j < cN; j++) {
2167       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2168     }
2169   }
2170   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2171 
2172   /* sum all atbarray to local values of C */
2173   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2174   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2175   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2176   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2177   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2178   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2183 {
2184   MPI_Comm               comm;
2185   PetscMPIInt            size;
2186   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2187   Mat_TransMatMultDense *atb;
2188   PetscBool              cisdense = PETSC_FALSE;
2189   const PetscInt        *ranges;
2190 
2191   PetscFunctionBegin;
2192   MatCheckProduct(C, 4);
2193   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2194   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2195   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2196              A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2197 
2198   /* create matrix product C */
2199   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2200 #if defined(PETSC_HAVE_CUDA)
2201   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2202 #endif
2203 #if defined(PETSC_HAVE_HIP)
2204   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2205 #endif
2206   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2207   PetscCall(MatSetUp(C));
2208 
2209   /* create data structure for reuse C */
2210   PetscCallMPI(MPI_Comm_size(comm, &size));
2211   PetscCall(PetscNew(&atb));
2212   cM = C->rmap->N;
2213   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2214   PetscCall(MatGetOwnershipRanges(C, &ranges));
2215   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2216   C->product->data    = atb;
2217   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2218   PetscFunctionReturn(PETSC_SUCCESS);
2219 }
2220 
2221 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2222 {
2223   MPI_Comm               comm;
2224   PetscMPIInt            i, size;
2225   PetscInt               maxRows, bufsiz;
2226   PetscMPIInt            tag;
2227   PetscInt               alg;
2228   Mat_MatTransMultDense *abt;
2229   Mat_Product           *product = C->product;
2230   PetscBool              flg;
2231 
2232   PetscFunctionBegin;
2233   MatCheckProduct(C, 4);
2234   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2235   /* check local size of A and B */
2236   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2237 
2238   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2239   alg = flg ? 0 : 1;
2240 
2241   /* setup matrix product C */
2242   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2243   PetscCall(MatSetType(C, MATMPIDENSE));
2244   PetscCall(MatSetUp(C));
2245   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2246 
2247   /* create data structure for reuse C */
2248   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2249   PetscCallMPI(MPI_Comm_size(comm, &size));
2250   PetscCall(PetscNew(&abt));
2251   abt->tag = tag;
2252   abt->alg = alg;
2253   switch (alg) {
2254   case 1: /* alg: "cyclic" */
2255     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2256     bufsiz = A->cmap->N * maxRows;
2257     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2258     break;
2259   default: /* alg: "allgatherv" */
2260     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2261     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2262     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2263     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2264     break;
2265   }
2266 
2267   C->product->data                = abt;
2268   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2269   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2270   PetscFunctionReturn(PETSC_SUCCESS);
2271 }
2272 
2273 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2274 {
2275   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2276   Mat_MatTransMultDense *abt;
2277   MPI_Comm               comm;
2278   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2279   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2280   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2281   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2282   const PetscScalar     *av, *bv;
2283   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2284   MPI_Request            reqs[2];
2285   const PetscInt        *ranges;
2286 
2287   PetscFunctionBegin;
2288   MatCheckProduct(C, 3);
2289   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2290   abt = (Mat_MatTransMultDense *)C->product->data;
2291   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2292   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2293   PetscCallMPI(MPI_Comm_size(comm, &size));
2294   PetscCall(MatDenseGetArrayRead(a->A, &av));
2295   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2296   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2297   PetscCall(MatDenseGetLDA(a->A, &i));
2298   PetscCall(PetscBLASIntCast(i, &alda));
2299   PetscCall(MatDenseGetLDA(b->A, &i));
2300   PetscCall(PetscBLASIntCast(i, &blda));
2301   PetscCall(MatDenseGetLDA(c->A, &i));
2302   PetscCall(PetscBLASIntCast(i, &clda));
2303   PetscCall(MatGetOwnershipRanges(B, &ranges));
2304   bn = B->rmap->n;
2305   if (blda == bn) {
2306     sendbuf = (PetscScalar *)bv;
2307   } else {
2308     sendbuf = abt->buf[0];
2309     for (k = 0, i = 0; i < cK; i++) {
2310       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2311     }
2312   }
2313   if (size > 1) {
2314     sendto   = (rank + size - 1) % size;
2315     recvfrom = (rank + size + 1) % size;
2316   } else {
2317     sendto = recvfrom = 0;
2318   }
2319   PetscCall(PetscBLASIntCast(cK, &ck));
2320   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2321   recvisfrom = rank;
2322   for (i = 0; i < size; i++) {
2323     /* we have finished receiving in sending, bufs can be read/modified */
2324     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2325     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2326 
2327     if (nextrecvisfrom != rank) {
2328       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2329       sendsiz = cK * bn;
2330       recvsiz = cK * nextbn;
2331       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2332       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2333       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2334     }
2335 
2336     /* local aseq * sendbuf^T */
2337     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2338     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2339 
2340     if (nextrecvisfrom != rank) {
2341       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2342       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2343     }
2344     bn         = nextbn;
2345     recvisfrom = nextrecvisfrom;
2346     sendbuf    = recvbuf;
2347   }
2348   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2349   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2350   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2351   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2352   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2353   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2354   PetscFunctionReturn(PETSC_SUCCESS);
2355 }
2356 
2357 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2358 {
2359   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2360   Mat_MatTransMultDense *abt;
2361   MPI_Comm               comm;
2362   PetscMPIInt            size, ibn;
2363   PetscScalar           *cv, *sendbuf, *recvbuf;
2364   const PetscScalar     *av, *bv;
2365   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2366   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2367   PetscBLASInt           cm, cn, ck, alda, clda;
2368 
2369   PetscFunctionBegin;
2370   MatCheckProduct(C, 3);
2371   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2372   abt = (Mat_MatTransMultDense *)C->product->data;
2373   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2374   PetscCallMPI(MPI_Comm_size(comm, &size));
2375   PetscCall(MatDenseGetArrayRead(a->A, &av));
2376   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2377   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2378   PetscCall(MatDenseGetLDA(a->A, &i));
2379   PetscCall(PetscBLASIntCast(i, &alda));
2380   PetscCall(MatDenseGetLDA(b->A, &blda));
2381   PetscCall(MatDenseGetLDA(c->A, &i));
2382   PetscCall(PetscBLASIntCast(i, &clda));
2383   /* copy transpose of B into buf[0] */
2384   bn      = B->rmap->n;
2385   sendbuf = abt->buf[0];
2386   recvbuf = abt->buf[1];
2387   for (k = 0, j = 0; j < bn; j++) {
2388     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2389   }
2390   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2391   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2392   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2393   PetscCall(PetscBLASIntCast(cK, &ck));
2394   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2395   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2396   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2397   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2398   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2399   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2400   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2401   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2402   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2407 {
2408   Mat_MatTransMultDense *abt;
2409 
2410   PetscFunctionBegin;
2411   MatCheckProduct(C, 3);
2412   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2413   abt = (Mat_MatTransMultDense *)C->product->data;
2414   switch (abt->alg) {
2415   case 1:
2416     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2417     break;
2418   default:
2419     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2420     break;
2421   }
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2426 {
2427   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2428 
2429   PetscFunctionBegin;
2430   PetscCall(MatDestroy(&ab->Ce));
2431   PetscCall(MatDestroy(&ab->Ae));
2432   PetscCall(MatDestroy(&ab->Be));
2433   PetscCall(PetscFree(ab));
2434   PetscFunctionReturn(PETSC_SUCCESS);
2435 }
2436 
2437 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2438 {
2439   Mat_MatMultDense *ab;
2440   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2441   Mat_MPIDense     *b   = (Mat_MPIDense *)B->data;
2442 
2443   PetscFunctionBegin;
2444   MatCheckProduct(C, 3);
2445   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2446   ab = (Mat_MatMultDense *)C->product->data;
2447   if (ab->Ae && ab->Ce) {
2448 #if PetscDefined(HAVE_ELEMENTAL)
2449     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2450     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2451     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2452     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2453 #else
2454     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2455 #endif
2456   } else {
2457     MPI_Comm           comm;
2458     const PetscScalar *read;
2459     PetscScalar       *write;
2460     PetscInt           lda;
2461     const PetscInt    *ranges;
2462     PetscMPIInt        size;
2463 
2464     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2465     comm = PetscObjectComm((PetscObject)B);
2466     PetscCallMPI(MPI_Comm_size(comm, &size));
2467     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2468     if (ranges[1] == ranges[size]) {
2469       // optimize for the case where the B matrix is broadcast from rank 0
2470       PetscInt           b_lda, be_lda;
2471       Mat                b_local  = b->A;
2472       Mat                b_alloc  = NULL;
2473       Mat                be_local = ab->Be;
2474       Mat                be_alloc = NULL;
2475       PetscMemType       b_memtype, be_memtype;
2476       const PetscScalar *b_array = NULL;
2477       MPI_Datatype       vector_type;
2478       PetscScalar       *be_array = NULL;
2479       PetscMPIInt        rank;
2480 
2481       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2482       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2483       if (be_lda != B->rmap->N) {
2484         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2485         be_local = be_alloc;
2486       }
2487 
2488       if (rank == 0) {
2489         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2490         if (b_lda != B->rmap->N) {
2491           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2492           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2493           b_local = b_alloc;
2494         }
2495       }
2496       vector_type = MPIU_SCALAR;
2497       if (B->cmap->N > 1) {
2498         PetscMPIInt mpi_N;
2499 
2500         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2501         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2502         PetscCallMPI(MPI_Type_commit(&vector_type));
2503       }
2504       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2505       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2506       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2507       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2508       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2509       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2510       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2511       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2512       PetscCall(MatDestroy(&be_alloc));
2513       PetscCall(MatDestroy(&b_alloc));
2514     } else {
2515       PetscCall(MatDenseGetLDA(B, &lda));
2516       PetscCall(MatDenseGetArrayRead(B, &read));
2517       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2518       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2519         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2520         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2521       }
2522       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2523       PetscCall(MatDenseRestoreArrayRead(B, &read));
2524     }
2525     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2526   }
2527   PetscFunctionReturn(PETSC_SUCCESS);
2528 }
2529 
2530 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2531 {
2532   Mat_Product      *product = C->product;
2533   PetscInt          alg;
2534   Mat_MatMultDense *ab;
2535   PetscBool         flg;
2536 
2537   PetscFunctionBegin;
2538   MatCheckProduct(C, 4);
2539   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2540   /* check local size of A and B */
2541   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2542              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2543 
2544   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2545   alg = flg ? 0 : 1;
2546 
2547   /* setup C */
2548   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2549   PetscCall(MatSetType(C, MATMPIDENSE));
2550   PetscCall(MatSetUp(C));
2551 
2552   /* create data structure for reuse Cdense */
2553   PetscCall(PetscNew(&ab));
2554 
2555   switch (alg) {
2556   case 1: /* alg: "elemental" */
2557 #if PetscDefined(HAVE_ELEMENTAL)
2558     /* create elemental matrices Ae and Be */
2559     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2560     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2561     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2562     PetscCall(MatSetUp(ab->Ae));
2563     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2564 
2565     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2566     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2567     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2568     PetscCall(MatSetUp(ab->Be));
2569     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2570 
2571     /* compute symbolic Ce = Ae*Be */
2572     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2573     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2574 #else
2575     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2576 #endif
2577     break;
2578   default: /* alg: "petsc" */
2579     ab->Ae = NULL;
2580     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2581     ab->Ce = NULL;
2582     break;
2583   }
2584 
2585   C->product->data       = ab;
2586   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2587   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2588   PetscFunctionReturn(PETSC_SUCCESS);
2589 }
2590 
2591 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2592 {
2593   Mat_Product *product     = C->product;
2594   const char  *algTypes[2] = {"petsc", "elemental"};
2595   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2596   PetscBool    flg = PETSC_FALSE;
2597 
2598   PetscFunctionBegin;
2599   /* Set default algorithm */
2600   alg = 0; /* default is PETSc */
2601   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2602   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2603 
2604   /* Get runtime option */
2605   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2606   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2607   PetscOptionsEnd();
2608   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2609 
2610   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2611   C->ops->productsymbolic = MatProductSymbolic_AB;
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
2615 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2616 {
2617   Mat_Product *product = C->product;
2618   Mat          A = product->A, B = product->B;
2619 
2620   PetscFunctionBegin;
2621   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2622              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2623   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2624   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2625   PetscFunctionReturn(PETSC_SUCCESS);
2626 }
2627 
2628 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2629 {
2630   Mat_Product *product     = C->product;
2631   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2632   PetscInt     alg, nalg = 2;
2633   PetscBool    flg = PETSC_FALSE;
2634 
2635   PetscFunctionBegin;
2636   /* Set default algorithm */
2637   alg = 0; /* default is allgatherv */
2638   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2639   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2640 
2641   /* Get runtime option */
2642   if (product->api_user) {
2643     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2644     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2645     PetscOptionsEnd();
2646   } else {
2647     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2648     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2649     PetscOptionsEnd();
2650   }
2651   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2652 
2653   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2654   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2655   PetscFunctionReturn(PETSC_SUCCESS);
2656 }
2657 
2658 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2659 {
2660   Mat_Product *product = C->product;
2661 
2662   PetscFunctionBegin;
2663   switch (product->type) {
2664   case MATPRODUCT_AB:
2665     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2666     break;
2667   case MATPRODUCT_AtB:
2668     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2669     break;
2670   case MATPRODUCT_ABt:
2671     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2672     break;
2673   default:
2674     break;
2675   }
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678