xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 67af85e8c500e44360931f69e75318823dba22e0)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
467 {
468   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469   const PetscScalar *ax;
470   PetscScalar       *ay;
471   PetscMemType       axmtype, aymtype;
472 
473   PetscFunctionBegin;
474   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
475   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
481   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType       axmtype, aymtype;
491 
492   PetscFunctionBegin;
493   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
494   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
495   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
496   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
497   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
498   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
499   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
500   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType       axmtype, aymtype;
510 
511   PetscFunctionBegin;
512   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
513   PetscCall(VecSet(yy, 0.0));
514   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
515   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
516   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
517   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
518   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
519   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
520   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
525 {
526   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
527   const PetscScalar *ax;
528   PetscScalar       *ay;
529   PetscMemType       axmtype, aymtype;
530 
531   PetscFunctionBegin;
532   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
533   PetscCall(VecCopy(yy, zz));
534   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
535   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
536   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
537   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
538   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
539   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
540   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
541   PetscFunctionReturn(PETSC_SUCCESS);
542 }
543 
544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
545 {
546   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
547   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
548   PetscScalar       *x;
549   const PetscScalar *av;
550 
551   PetscFunctionBegin;
552   PetscCall(VecGetArray(v, &x));
553   PetscCall(VecGetSize(v, &ng));
554   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
555   PetscCall(VecGetLocalSize(v, &nl));
556   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
557   radd = A->rmap->rstart * m;
558   PetscCall(MatDenseGetArrayRead(a->A, &av));
559   PetscCall(MatDenseGetLDA(a->A, &lda));
560   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
561   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
562   PetscCall(PetscArrayzero(x + i, nl - i));
563   PetscCall(VecRestoreArray(v, &x));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
568 {
569   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
570 
571   PetscFunctionBegin;
572   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
573   PetscCall(MatStashDestroy_Private(&mat->stash));
574   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
575   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
576   PetscCall(MatDestroy(&mdn->A));
577   PetscCall(VecDestroy(&mdn->lvec));
578   PetscCall(PetscSFDestroy(&mdn->Mvctx));
579   PetscCall(VecDestroy(&mdn->cvec));
580   PetscCall(MatDestroy(&mdn->cmat));
581 
582   PetscCall(PetscFree(mat->data));
583   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
584 
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
598 #if defined(PETSC_HAVE_ELEMENTAL)
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
600 #endif
601 #if defined(PETSC_HAVE_SCALAPACK)
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
603 #endif
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
607 #if defined(PETSC_HAVE_CUDA)
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
626 #endif
627 #if defined(PETSC_HAVE_HIP)
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
646 #endif
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
657 
658   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 #include <petscdraw.h>
663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
664 {
665   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
666   PetscMPIInt       rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii, isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
674   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
675   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
676   if (iascii) {
677     PetscCall(PetscViewerGetType(viewer, &vtype));
678     PetscCall(PetscViewerGetFormat(viewer, &format));
679     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
680       MatInfo info;
681       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
682       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
683       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
684                                                    (PetscInt)info.memory));
685       PetscCall(PetscViewerFlush(viewer));
686       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
687       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
688       PetscFunctionReturn(PETSC_SUCCESS);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(PETSC_SUCCESS);
691     }
692   } else if (isdraw) {
693     PetscDraw draw;
694     PetscBool isnull;
695 
696     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
697     PetscCall(PetscDrawIsNull(draw, &isnull));
698     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
699   }
700 
701   {
702     /* assemble the entire matrix onto first processor. */
703     Mat          A;
704     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
705     PetscInt    *cols;
706     PetscScalar *vals;
707 
708     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
709     if (rank == 0) {
710       PetscCall(MatSetSizes(A, M, N, M, N));
711     } else {
712       PetscCall(MatSetSizes(A, 0, 0, M, N));
713     }
714     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
715     PetscCall(MatSetType(A, MATMPIDENSE));
716     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721 
722     row = mat->rmap->rstart;
723     m   = mdn->A->rmap->n;
724     for (i = 0; i < m; i++) {
725       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
726       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
727       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
728       row++;
729     }
730 
731     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
732     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
733     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
734     if (rank == 0) {
735       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
736       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
737     }
738     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
739     PetscCall(MatDestroy(&A));
740   }
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
745 {
746   PetscBool iascii, isbinary, isdraw, issocket;
747 
748   PetscFunctionBegin;
749   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
750   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
751   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
752   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
753 
754   if (iascii || issocket || isdraw) {
755     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
756   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
761 {
762   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
763   Mat            mdn = mat->A;
764   PetscLogDouble isend[5], irecv[5];
765 
766   PetscFunctionBegin;
767   info->block_size = 1.0;
768 
769   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
770 
771   isend[0] = info->nz_used;
772   isend[1] = info->nz_allocated;
773   isend[2] = info->nz_unneeded;
774   isend[3] = info->memory;
775   isend[4] = info->mallocs;
776   if (flag == MAT_LOCAL) {
777     info->nz_used      = isend[0];
778     info->nz_allocated = isend[1];
779     info->nz_unneeded  = isend[2];
780     info->memory       = isend[3];
781     info->mallocs      = isend[4];
782   } else if (flag == MAT_GLOBAL_MAX) {
783     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
784 
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
806 {
807   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
808 
809   PetscFunctionBegin;
810   switch (op) {
811   case MAT_NEW_NONZERO_LOCATIONS:
812   case MAT_NEW_NONZERO_LOCATION_ERR:
813   case MAT_NEW_NONZERO_ALLOCATION_ERR:
814     MatCheckPreallocated(A, 1);
815     PetscCall(MatSetOption(a->A, op, flg));
816     break;
817   case MAT_ROW_ORIENTED:
818     MatCheckPreallocated(A, 1);
819     a->roworiented = flg;
820     PetscCall(MatSetOption(a->A, op, flg));
821     break;
822   case MAT_FORCE_DIAGONAL_ENTRIES:
823   case MAT_KEEP_NONZERO_PATTERN:
824   case MAT_USE_HASH_TABLE:
825   case MAT_SORTED_FULL:
826     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
836   case MAT_SPD:
837   case MAT_IGNORE_LOWER_TRIANGULAR:
838   case MAT_IGNORE_ZERO_ENTRIES:
839   case MAT_SPD_ETERNAL:
840     /* if the diagonal matrix is square it inherits some of the properties above */
841     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
842     break;
843   default:
844     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
845   }
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
850 {
851   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
852   const PetscScalar *l;
853   PetscScalar        x, *v, *vv, *r;
854   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
855 
856   PetscFunctionBegin;
857   PetscCall(MatDenseGetArray(mdn->A, &vv));
858   PetscCall(MatDenseGetLDA(mdn->A, &lda));
859   PetscCall(MatGetLocalSize(A, &s2, &s3));
860   if (ll) {
861     PetscCall(VecGetLocalSize(ll, &s2a));
862     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
863     PetscCall(VecGetArrayRead(ll, &l));
864     for (i = 0; i < m; i++) {
865       x = l[i];
866       v = vv + i;
867       for (j = 0; j < n; j++) {
868         (*v) *= x;
869         v += lda;
870       }
871     }
872     PetscCall(VecRestoreArrayRead(ll, &l));
873     PetscCall(PetscLogFlops(1.0 * n * m));
874   }
875   if (rr) {
876     const PetscScalar *ar;
877 
878     PetscCall(VecGetLocalSize(rr, &s3a));
879     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
880     PetscCall(VecGetArrayRead(rr, &ar));
881     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
882     PetscCall(VecGetArray(mdn->lvec, &r));
883     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
884     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
885     PetscCall(VecRestoreArrayRead(rr, &ar));
886     for (i = 0; i < n; i++) {
887       x = r[i];
888       v = vv + i * lda;
889       for (j = 0; j < m; j++) (*v++) *= x;
890     }
891     PetscCall(VecRestoreArray(mdn->lvec, &r));
892     PetscCall(PetscLogFlops(1.0 * n * m));
893   }
894   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
899 {
900   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
901   PetscInt           i, j;
902   PetscMPIInt        size;
903   PetscReal          sum = 0.0;
904   const PetscScalar *av, *v;
905 
906   PetscFunctionBegin;
907   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
908   v = av;
909   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
910   if (size == 1) {
911     PetscCall(MatNorm(mdn->A, type, nrm));
912   } else {
913     if (type == NORM_FROBENIUS) {
914       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
915         sum += PetscRealPart(PetscConj(*v) * (*v));
916         v++;
917       }
918       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
919       *nrm = PetscSqrtReal(*nrm);
920       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
921     } else if (type == NORM_1) {
922       PetscReal *tmp, *tmp2;
923       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
924       *nrm = 0.0;
925       v    = av;
926       for (j = 0; j < mdn->A->cmap->n; j++) {
927         for (i = 0; i < mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);
929           v++;
930         }
931       }
932       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
933       for (j = 0; j < A->cmap->N; j++) {
934         if (tmp2[j] > *nrm) *nrm = tmp2[j];
935       }
936       PetscCall(PetscFree2(tmp, tmp2));
937       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
938     } else if (type == NORM_INFINITY) { /* max row norm */
939       PetscReal ntemp;
940       PetscCall(MatNorm(mdn->A, type, &ntemp));
941       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
942     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
943   }
944   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
949 {
950   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951   Mat           B;
952   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
953   PetscInt      j, i, lda;
954   PetscScalar  *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
959     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
960     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
961     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
962     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
963   } else B = *matout;
964 
965   m = a->A->rmap->n;
966   n = a->A->cmap->n;
967   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
968   PetscCall(MatDenseGetLDA(a->A, &lda));
969   PetscCall(PetscMalloc1(m, &rwork));
970   for (i = 0; i < m; i++) rwork[i] = rstart + i;
971   for (j = 0; j < n; j++) {
972     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
973     v += lda;
974   }
975   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(PetscFree(rwork));
977   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
978   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
979   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
980     *matout = B;
981   } else {
982     PetscCall(MatHeaderMerge(A, &B));
983   }
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
988 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
989 
990 static PetscErrorCode MatSetUp_MPIDense(Mat A)
991 {
992   PetscFunctionBegin;
993   PetscCall(PetscLayoutSetUp(A->rmap));
994   PetscCall(PetscLayoutSetUp(A->cmap));
995   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1000 {
1001   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1002 
1003   PetscFunctionBegin;
1004   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(MatConjugate(a->A));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1018 {
1019   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1020 
1021   PetscFunctionBegin;
1022   PetscCall(MatRealPart(a->A));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatImaginaryPart(a->A));
1032   PetscFunctionReturn(PETSC_SUCCESS);
1033 }
1034 
1035 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1036 {
1037   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038 
1039   PetscFunctionBegin;
1040   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1041   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1042   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1047 
1048 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1049 {
1050   PetscInt      i, m, n;
1051   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052   PetscReal    *work;
1053 
1054   PetscFunctionBegin;
1055   PetscCall(MatGetSize(A, &m, &n));
1056   PetscCall(PetscMalloc1(n, &work));
1057   if (type == REDUCTION_MEAN_REALPART) {
1058     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1059   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1060     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1061   } else {
1062     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1063   }
1064   if (type == NORM_2) {
1065     for (i = 0; i < n; i++) work[i] *= work[i];
1066   }
1067   if (type == NORM_INFINITY) {
1068     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1069   } else {
1070     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1071   }
1072   PetscCall(PetscFree(work));
1073   if (type == NORM_2) {
1074     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1075   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1076     for (i = 0; i < n; i++) reductions[i] /= m;
1077   }
1078   PetscFunctionReturn(PETSC_SUCCESS);
1079 }
1080 
1081 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1082 {
1083   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1084 
1085   PetscFunctionBegin;
1086   PetscCall(MatSetRandom(d->A, rctx));
1087 #if defined(PETSC_HAVE_DEVICE)
1088   x->offloadmask = d->A->offloadmask;
1089 #endif
1090   PetscFunctionReturn(PETSC_SUCCESS);
1091 }
1092 
1093 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1094 {
1095   PetscFunctionBegin;
1096   *missing = PETSC_FALSE;
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1101 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1102 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1103 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1104 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1105 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1106 
1107 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1108                                        MatGetRow_MPIDense,
1109                                        MatRestoreRow_MPIDense,
1110                                        MatMult_MPIDense,
1111                                        /*  4*/ MatMultAdd_MPIDense,
1112                                        MatMultTranspose_MPIDense,
1113                                        MatMultTransposeAdd_MPIDense,
1114                                        NULL,
1115                                        NULL,
1116                                        NULL,
1117                                        /* 10*/ NULL,
1118                                        NULL,
1119                                        NULL,
1120                                        NULL,
1121                                        MatTranspose_MPIDense,
1122                                        /* 15*/ MatGetInfo_MPIDense,
1123                                        MatEqual_MPIDense,
1124                                        MatGetDiagonal_MPIDense,
1125                                        MatDiagonalScale_MPIDense,
1126                                        MatNorm_MPIDense,
1127                                        /* 20*/ MatAssemblyBegin_MPIDense,
1128                                        MatAssemblyEnd_MPIDense,
1129                                        MatSetOption_MPIDense,
1130                                        MatZeroEntries_MPIDense,
1131                                        /* 24*/ MatZeroRows_MPIDense,
1132                                        NULL,
1133                                        NULL,
1134                                        NULL,
1135                                        NULL,
1136                                        /* 29*/ MatSetUp_MPIDense,
1137                                        NULL,
1138                                        NULL,
1139                                        MatGetDiagonalBlock_MPIDense,
1140                                        NULL,
1141                                        /* 34*/ MatDuplicate_MPIDense,
1142                                        NULL,
1143                                        NULL,
1144                                        NULL,
1145                                        NULL,
1146                                        /* 39*/ MatAXPY_MPIDense,
1147                                        MatCreateSubMatrices_MPIDense,
1148                                        NULL,
1149                                        MatGetValues_MPIDense,
1150                                        MatCopy_MPIDense,
1151                                        /* 44*/ NULL,
1152                                        MatScale_MPIDense,
1153                                        MatShift_MPIDense,
1154                                        NULL,
1155                                        NULL,
1156                                        /* 49*/ MatSetRandom_MPIDense,
1157                                        NULL,
1158                                        NULL,
1159                                        NULL,
1160                                        NULL,
1161                                        /* 54*/ NULL,
1162                                        NULL,
1163                                        NULL,
1164                                        NULL,
1165                                        NULL,
1166                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1167                                        MatDestroy_MPIDense,
1168                                        MatView_MPIDense,
1169                                        NULL,
1170                                        NULL,
1171                                        /* 64*/ NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        /* 69*/ NULL,
1177                                        NULL,
1178                                        NULL,
1179                                        NULL,
1180                                        NULL,
1181                                        /* 74*/ NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        NULL,
1186                                        /* 79*/ NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        /* 83*/ MatLoad_MPIDense,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        /* 89*/ NULL,
1197                                        NULL,
1198                                        NULL,
1199                                        NULL,
1200                                        NULL,
1201                                        /* 94*/ NULL,
1202                                        NULL,
1203                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1204                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1205                                        NULL,
1206                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1207                                        NULL,
1208                                        NULL,
1209                                        MatConjugate_MPIDense,
1210                                        NULL,
1211                                        /*104*/ NULL,
1212                                        MatRealPart_MPIDense,
1213                                        MatImaginaryPart_MPIDense,
1214                                        NULL,
1215                                        NULL,
1216                                        /*109*/ NULL,
1217                                        NULL,
1218                                        NULL,
1219                                        MatGetColumnVector_MPIDense,
1220                                        MatMissingDiagonal_MPIDense,
1221                                        /*114*/ NULL,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        /*119*/ NULL,
1227                                        NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        /*124*/ NULL,
1232                                        MatGetColumnReductions_MPIDense,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /*129*/ NULL,
1237                                        NULL,
1238                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1239                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1240                                        NULL,
1241                                        /*134*/ NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        NULL,
1246                                        /*139*/ NULL,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1252                                        /*145*/ NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        /*150*/ NULL,
1258                                        NULL};
1259 
1260 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1261 {
1262   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1263   MatType       mtype = MATSEQDENSE;
1264 
1265   PetscFunctionBegin;
1266   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1267   PetscCall(PetscLayoutSetUp(mat->rmap));
1268   PetscCall(PetscLayoutSetUp(mat->cmap));
1269   if (!a->A) {
1270     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1271     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1272   }
1273 #if defined(PETSC_HAVE_CUDA)
1274   PetscBool iscuda;
1275   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1276   if (iscuda) mtype = MATSEQDENSECUDA;
1277 #endif
1278 #if defined(PETSC_HAVE_HIP)
1279   PetscBool iship;
1280   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1281   if (iship) mtype = MATSEQDENSEHIP;
1282 #endif
1283   PetscCall(MatSetType(a->A, mtype));
1284   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1285 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1286   mat->offloadmask = a->A->offloadmask;
1287 #endif
1288   mat->preallocated = PETSC_TRUE;
1289   mat->assembled    = PETSC_TRUE;
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1294 {
1295   Mat B, C;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1299   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1300   PetscCall(MatDestroy(&C));
1301   if (reuse == MAT_REUSE_MATRIX) {
1302     C = *newmat;
1303   } else C = NULL;
1304   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1305   PetscCall(MatDestroy(&B));
1306   if (reuse == MAT_INPLACE_MATRIX) {
1307     PetscCall(MatHeaderReplace(A, &C));
1308   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1309   PetscFunctionReturn(PETSC_SUCCESS);
1310 }
1311 
1312 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1313 {
1314   Mat B, C;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatDenseGetLocalMatrix(A, &C));
1318   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1319   if (reuse == MAT_REUSE_MATRIX) {
1320     C = *newmat;
1321   } else C = NULL;
1322   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1323   PetscCall(MatDestroy(&B));
1324   if (reuse == MAT_INPLACE_MATRIX) {
1325     PetscCall(MatHeaderReplace(A, &C));
1326   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1327   PetscFunctionReturn(PETSC_SUCCESS);
1328 }
1329 
1330 #if defined(PETSC_HAVE_ELEMENTAL)
1331 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1332 {
1333   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1334   Mat           mat_elemental;
1335   PetscScalar  *v;
1336   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1337 
1338   PetscFunctionBegin;
1339   if (reuse == MAT_REUSE_MATRIX) {
1340     mat_elemental = *newmat;
1341     PetscCall(MatZeroEntries(*newmat));
1342   } else {
1343     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1344     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1345     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1346     PetscCall(MatSetUp(mat_elemental));
1347     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1348   }
1349 
1350   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1351   for (i = 0; i < N; i++) cols[i] = i;
1352   for (i = 0; i < m; i++) rows[i] = rstart + i;
1353 
1354   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1355   PetscCall(MatDenseGetArray(A, &v));
1356   PetscCall(MatDenseGetLDA(a->A, &lda));
1357   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1358   else {
1359     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1360   }
1361   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1362   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1363   PetscCall(MatDenseRestoreArray(A, &v));
1364   PetscCall(PetscFree2(rows, cols));
1365 
1366   if (reuse == MAT_INPLACE_MATRIX) {
1367     PetscCall(MatHeaderReplace(A, &mat_elemental));
1368   } else {
1369     *newmat = mat_elemental;
1370   }
1371   PetscFunctionReturn(PETSC_SUCCESS);
1372 }
1373 #endif
1374 
1375 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1376 {
1377   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1378 
1379   PetscFunctionBegin;
1380   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1381   PetscFunctionReturn(PETSC_SUCCESS);
1382 }
1383 
1384 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1385 {
1386   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1390   PetscFunctionReturn(PETSC_SUCCESS);
1391 }
1392 
1393 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1394 {
1395   Mat_MPIDense *mat;
1396   PetscInt      m, nloc, N;
1397 
1398   PetscFunctionBegin;
1399   PetscCall(MatGetSize(inmat, &m, &N));
1400   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1401   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1402     PetscInt sum;
1403 
1404     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1405     /* Check sum(n) = N */
1406     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1407     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1408 
1409     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1410     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1411   }
1412 
1413   /* numeric phase */
1414   mat = (Mat_MPIDense *)(*outmat)->data;
1415   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1416   PetscFunctionReturn(PETSC_SUCCESS);
1417 }
1418 
1419 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1420 {
1421   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1422   PetscInt      lda;
1423 
1424   PetscFunctionBegin;
1425   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1426   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1427   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1428   a->vecinuse = col + 1;
1429   PetscCall(MatDenseGetLDA(a->A, &lda));
1430   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1431   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1432   *v = a->cvec;
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1437 {
1438   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439 
1440   PetscFunctionBegin;
1441   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1442   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1443   a->vecinuse = 0;
1444   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1445   PetscCall(VecResetArray(a->cvec));
1446   if (v) *v = NULL;
1447   PetscFunctionReturn(PETSC_SUCCESS);
1448 }
1449 
1450 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1451 {
1452   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1453   PetscInt      lda;
1454 
1455   PetscFunctionBegin;
1456   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1457   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1458   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1459   a->vecinuse = col + 1;
1460   PetscCall(MatDenseGetLDA(a->A, &lda));
1461   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1462   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1463   PetscCall(VecLockReadPush(a->cvec));
1464   *v = a->cvec;
1465   PetscFunctionReturn(PETSC_SUCCESS);
1466 }
1467 
1468 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1469 {
1470   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1471 
1472   PetscFunctionBegin;
1473   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1474   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1475   a->vecinuse = 0;
1476   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1477   PetscCall(VecLockReadPop(a->cvec));
1478   PetscCall(VecResetArray(a->cvec));
1479   if (v) *v = NULL;
1480   PetscFunctionReturn(PETSC_SUCCESS);
1481 }
1482 
1483 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1484 {
1485   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1486   PetscInt      lda;
1487 
1488   PetscFunctionBegin;
1489   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1490   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1491   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1492   a->vecinuse = col + 1;
1493   PetscCall(MatDenseGetLDA(a->A, &lda));
1494   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1495   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1496   *v = a->cvec;
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1501 {
1502   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1503 
1504   PetscFunctionBegin;
1505   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1506   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1507   a->vecinuse = 0;
1508   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1509   PetscCall(VecResetArray(a->cvec));
1510   if (v) *v = NULL;
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1515 {
1516   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1517   Mat_MPIDense *c;
1518   MPI_Comm      comm;
1519   PetscInt      pbegin, pend;
1520 
1521   PetscFunctionBegin;
1522   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1523   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1524   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1525   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1526   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1527   if (!a->cmat) {
1528     PetscCall(MatCreate(comm, &a->cmat));
1529     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1530     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1531     else {
1532       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1533       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1534       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1535     }
1536     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1537     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1538   } else {
1539     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1540     if (same && a->cmat->rmap->N != A->rmap->N) {
1541       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1542       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1543     }
1544     if (!same) {
1545       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1546       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1547       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1548       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1549       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1550     }
1551     if (cend - cbegin != a->cmat->cmap->N) {
1552       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1553       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1554       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1555       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1556     }
1557   }
1558   c = (Mat_MPIDense *)a->cmat->data;
1559   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1560   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1561 
1562   a->cmat->preallocated = PETSC_TRUE;
1563   a->cmat->assembled    = PETSC_TRUE;
1564 #if defined(PETSC_HAVE_DEVICE)
1565   a->cmat->offloadmask = c->A->offloadmask;
1566 #endif
1567   a->matinuse = cbegin + 1;
1568   *v          = a->cmat;
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1573 {
1574   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1575   Mat_MPIDense *c;
1576 
1577   PetscFunctionBegin;
1578   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1579   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1580   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1581   a->matinuse = 0;
1582   c           = (Mat_MPIDense *)a->cmat->data;
1583   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1584   if (v) *v = NULL;
1585 #if defined(PETSC_HAVE_DEVICE)
1586   A->offloadmask = a->A->offloadmask;
1587 #endif
1588   PetscFunctionReturn(PETSC_SUCCESS);
1589 }
1590 
1591 /*MC
1592    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1593 
1594    Options Database Key:
1595 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1596 
1597   Level: beginner
1598 
1599 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1600 M*/
1601 PetscErrorCode MatCreate_MPIDense(Mat mat)
1602 {
1603   Mat_MPIDense *a;
1604 
1605   PetscFunctionBegin;
1606   PetscCall(PetscNew(&a));
1607   mat->data   = (void *)a;
1608   mat->ops[0] = MatOps_Values;
1609 
1610   mat->insertmode = NOT_SET_VALUES;
1611 
1612   /* build cache for off array entries formed */
1613   a->donotstash = PETSC_FALSE;
1614 
1615   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1616 
1617   /* stuff used for matrix vector multiply */
1618   a->lvec        = NULL;
1619   a->Mvctx       = NULL;
1620   a->roworiented = PETSC_TRUE;
1621 
1622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1643 #if defined(PETSC_HAVE_ELEMENTAL)
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1645 #endif
1646 #if defined(PETSC_HAVE_SCALAPACK)
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1648 #endif
1649 #if defined(PETSC_HAVE_CUDA)
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1651 #endif
1652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1655 #if defined(PETSC_HAVE_CUDA)
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1658 #endif
1659 #if defined(PETSC_HAVE_HIP)
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1663 #endif
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1666   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1667   PetscFunctionReturn(PETSC_SUCCESS);
1668 }
1669 
1670 /*MC
1671    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1672 
1673    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1674    and `MATMPIDENSE` otherwise.
1675 
1676    Options Database Key:
1677 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1678 
1679   Level: beginner
1680 
1681 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1682 M*/
1683 
1684 /*@C
1685   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1686 
1687   Collective
1688 
1689   Input Parameters:
1690 + B    - the matrix
1691 - data - optional location of matrix data.  Set to `NULL` for PETSc
1692    to control all matrix memory allocation.
1693 
1694   Level: intermediate
1695 
1696   Notes:
1697   The dense format is fully compatible with standard Fortran
1698   storage by columns.
1699 
1700   The data input variable is intended primarily for Fortran programmers
1701   who wish to allocate their own matrix memory space.  Most users should
1702   set `data` to `NULL`.
1703 
1704 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1705 @*/
1706 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1707 {
1708   PetscFunctionBegin;
1709   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1710   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 /*@
1715   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1716   array provided by the user. This is useful to avoid copying an array
1717   into a matrix
1718 
1719   Not Collective
1720 
1721   Input Parameters:
1722 + mat   - the matrix
1723 - array - the array in column major order
1724 
1725   Level: developer
1726 
1727   Note:
1728   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1729   freed when the matrix is destroyed.
1730 
1731 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1732           `MatDenseReplaceArray()`
1733 @*/
1734 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1735 {
1736   PetscFunctionBegin;
1737   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1738   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1739   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1740 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1741   mat->offloadmask = PETSC_OFFLOAD_CPU;
1742 #endif
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 /*@
1747   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1748 
1749   Not Collective
1750 
1751   Input Parameter:
1752 . mat - the matrix
1753 
1754   Level: developer
1755 
1756   Note:
1757   You can only call this after a call to `MatDensePlaceArray()`
1758 
1759 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1760 @*/
1761 PetscErrorCode MatDenseResetArray(Mat mat)
1762 {
1763   PetscFunctionBegin;
1764   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1765   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1766   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 /*@
1771   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1772   array provided by the user. This is useful to avoid copying an array
1773   into a matrix
1774 
1775   Not Collective
1776 
1777   Input Parameters:
1778 + mat   - the matrix
1779 - array - the array in column major order
1780 
1781   Level: developer
1782 
1783   Note:
1784   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1785   freed by the user. It will be freed when the matrix is destroyed.
1786 
1787 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1788 @*/
1789 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1790 {
1791   PetscFunctionBegin;
1792   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1793   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1794   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1795 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1796   mat->offloadmask = PETSC_OFFLOAD_CPU;
1797 #endif
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 /*@C
1802   MatCreateDense - Creates a matrix in `MATDENSE` format.
1803 
1804   Collective
1805 
1806   Input Parameters:
1807 + comm - MPI communicator
1808 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1809 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1810 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1811 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1812 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1813    to control all matrix memory allocation.
1814 
1815   Output Parameter:
1816 . A - the matrix
1817 
1818   Level: intermediate
1819 
1820   Notes:
1821   The dense format is fully compatible with standard Fortran
1822   storage by columns.
1823 
1824   Although local portions of the matrix are stored in column-major
1825   order, the matrix is partitioned across MPI ranks by row.
1826 
1827   The data input variable is intended primarily for Fortran programmers
1828   who wish to allocate their own matrix memory space.  Most users should
1829   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1830 
1831   The user MUST specify either the local or global matrix dimensions
1832   (possibly both).
1833 
1834 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1835 @*/
1836 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1837 {
1838   PetscFunctionBegin;
1839   PetscCall(MatCreate(comm, A));
1840   PetscCall(MatSetSizes(*A, m, n, M, N));
1841   PetscCall(MatSetType(*A, MATDENSE));
1842   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1843   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1844   PetscFunctionReturn(PETSC_SUCCESS);
1845 }
1846 
1847 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1848 {
1849   Mat           mat;
1850   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1851 
1852   PetscFunctionBegin;
1853   *newmat = NULL;
1854   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1855   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1856   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1857   a = (Mat_MPIDense *)mat->data;
1858 
1859   mat->factortype   = A->factortype;
1860   mat->assembled    = PETSC_TRUE;
1861   mat->preallocated = PETSC_TRUE;
1862 
1863   mat->insertmode = NOT_SET_VALUES;
1864   a->donotstash   = oldmat->donotstash;
1865 
1866   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1867   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1868 
1869   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1870 
1871   *newmat = mat;
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1876 {
1877   PetscBool isbinary;
1878 #if defined(PETSC_HAVE_HDF5)
1879   PetscBool ishdf5;
1880 #endif
1881 
1882   PetscFunctionBegin;
1883   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1884   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1885   /* force binary viewer to load .info file if it has not yet done so */
1886   PetscCall(PetscViewerSetUp(viewer));
1887   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1888 #if defined(PETSC_HAVE_HDF5)
1889   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1890 #endif
1891   if (isbinary) {
1892     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1893 #if defined(PETSC_HAVE_HDF5)
1894   } else if (ishdf5) {
1895     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1896 #endif
1897   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1898   PetscFunctionReturn(PETSC_SUCCESS);
1899 }
1900 
1901 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1902 {
1903   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1904   Mat           a, b;
1905 
1906   PetscFunctionBegin;
1907   a = matA->A;
1908   b = matB->A;
1909   PetscCall(MatEqual(a, b, flag));
1910   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1911   PetscFunctionReturn(PETSC_SUCCESS);
1912 }
1913 
1914 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1915 {
1916   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1917 
1918   PetscFunctionBegin;
1919   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1920   PetscCall(MatDestroy(&atb->atb));
1921   PetscCall(PetscFree(atb));
1922   PetscFunctionReturn(PETSC_SUCCESS);
1923 }
1924 
1925 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1926 {
1927   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1928 
1929   PetscFunctionBegin;
1930   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1931   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1932   PetscCall(PetscFree(abt));
1933   PetscFunctionReturn(PETSC_SUCCESS);
1934 }
1935 
1936 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1937 {
1938   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1939   Mat_TransMatMultDense *atb;
1940   MPI_Comm               comm;
1941   PetscMPIInt            size, *recvcounts;
1942   PetscScalar           *carray, *sendbuf;
1943   const PetscScalar     *atbarray;
1944   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1945   const PetscInt        *ranges;
1946 
1947   PetscFunctionBegin;
1948   MatCheckProduct(C, 3);
1949   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1950   atb        = (Mat_TransMatMultDense *)C->product->data;
1951   recvcounts = atb->recvcounts;
1952   sendbuf    = atb->sendbuf;
1953 
1954   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1955   PetscCallMPI(MPI_Comm_size(comm, &size));
1956 
1957   /* compute atbarray = aseq^T * bseq */
1958   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1959 
1960   PetscCall(MatGetOwnershipRanges(C, &ranges));
1961 
1962   /* arrange atbarray into sendbuf */
1963   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1964   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1965   for (proc = 0, k = 0; proc < size; proc++) {
1966     for (j = 0; j < cN; j++) {
1967       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1968     }
1969   }
1970   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1971 
1972   /* sum all atbarray to local values of C */
1973   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1974   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1975   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1976   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1977   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1978   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1979   PetscFunctionReturn(PETSC_SUCCESS);
1980 }
1981 
1982 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1983 {
1984   MPI_Comm               comm;
1985   PetscMPIInt            size;
1986   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1987   Mat_TransMatMultDense *atb;
1988   PetscBool              cisdense = PETSC_FALSE;
1989   PetscInt               i;
1990   const PetscInt        *ranges;
1991 
1992   PetscFunctionBegin;
1993   MatCheckProduct(C, 4);
1994   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1995   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1996   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1997     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
1998   }
1999 
2000   /* create matrix product C */
2001   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2002 #if defined(PETSC_HAVE_CUDA)
2003   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2004 #endif
2005 #if defined(PETSC_HAVE_HIP)
2006   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2007 #endif
2008   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2009   PetscCall(MatSetUp(C));
2010 
2011   /* create data structure for reuse C */
2012   PetscCallMPI(MPI_Comm_size(comm, &size));
2013   PetscCall(PetscNew(&atb));
2014   cM = C->rmap->N;
2015   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2016   PetscCall(MatGetOwnershipRanges(C, &ranges));
2017   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2018 
2019   C->product->data    = atb;
2020   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2021   PetscFunctionReturn(PETSC_SUCCESS);
2022 }
2023 
2024 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2025 {
2026   MPI_Comm               comm;
2027   PetscMPIInt            i, size;
2028   PetscInt               maxRows, bufsiz;
2029   PetscMPIInt            tag;
2030   PetscInt               alg;
2031   Mat_MatTransMultDense *abt;
2032   Mat_Product           *product = C->product;
2033   PetscBool              flg;
2034 
2035   PetscFunctionBegin;
2036   MatCheckProduct(C, 4);
2037   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2038   /* check local size of A and B */
2039   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2040 
2041   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2042   alg = flg ? 0 : 1;
2043 
2044   /* setup matrix product C */
2045   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2046   PetscCall(MatSetType(C, MATMPIDENSE));
2047   PetscCall(MatSetUp(C));
2048   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2049 
2050   /* create data structure for reuse C */
2051   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2052   PetscCallMPI(MPI_Comm_size(comm, &size));
2053   PetscCall(PetscNew(&abt));
2054   abt->tag = tag;
2055   abt->alg = alg;
2056   switch (alg) {
2057   case 1: /* alg: "cyclic" */
2058     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2059     bufsiz = A->cmap->N * maxRows;
2060     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2061     break;
2062   default: /* alg: "allgatherv" */
2063     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2064     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2065     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2066     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2067     break;
2068   }
2069 
2070   C->product->data                = abt;
2071   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2072   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2073   PetscFunctionReturn(PETSC_SUCCESS);
2074 }
2075 
2076 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2077 {
2078   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2079   Mat_MatTransMultDense *abt;
2080   MPI_Comm               comm;
2081   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2082   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2083   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2084   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2085   const PetscScalar     *av, *bv;
2086   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2087   MPI_Request            reqs[2];
2088   const PetscInt        *ranges;
2089 
2090   PetscFunctionBegin;
2091   MatCheckProduct(C, 3);
2092   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2093   abt = (Mat_MatTransMultDense *)C->product->data;
2094   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2095   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2096   PetscCallMPI(MPI_Comm_size(comm, &size));
2097   PetscCall(MatDenseGetArrayRead(a->A, &av));
2098   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2099   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2100   PetscCall(MatDenseGetLDA(a->A, &i));
2101   PetscCall(PetscBLASIntCast(i, &alda));
2102   PetscCall(MatDenseGetLDA(b->A, &i));
2103   PetscCall(PetscBLASIntCast(i, &blda));
2104   PetscCall(MatDenseGetLDA(c->A, &i));
2105   PetscCall(PetscBLASIntCast(i, &clda));
2106   PetscCall(MatGetOwnershipRanges(B, &ranges));
2107   bn = B->rmap->n;
2108   if (blda == bn) {
2109     sendbuf = (PetscScalar *)bv;
2110   } else {
2111     sendbuf = abt->buf[0];
2112     for (k = 0, i = 0; i < cK; i++) {
2113       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2114     }
2115   }
2116   if (size > 1) {
2117     sendto   = (rank + size - 1) % size;
2118     recvfrom = (rank + size + 1) % size;
2119   } else {
2120     sendto = recvfrom = 0;
2121   }
2122   PetscCall(PetscBLASIntCast(cK, &ck));
2123   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2124   recvisfrom = rank;
2125   for (i = 0; i < size; i++) {
2126     /* we have finished receiving in sending, bufs can be read/modified */
2127     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2128     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2129 
2130     if (nextrecvisfrom != rank) {
2131       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2132       sendsiz = cK * bn;
2133       recvsiz = cK * nextbn;
2134       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2135       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2136       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2137     }
2138 
2139     /* local aseq * sendbuf^T */
2140     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2141     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2142 
2143     if (nextrecvisfrom != rank) {
2144       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2145       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2146     }
2147     bn         = nextbn;
2148     recvisfrom = nextrecvisfrom;
2149     sendbuf    = recvbuf;
2150   }
2151   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2152   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2153   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2154   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2155   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2156   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2161 {
2162   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2163   Mat_MatTransMultDense *abt;
2164   MPI_Comm               comm;
2165   PetscMPIInt            size;
2166   PetscScalar           *cv, *sendbuf, *recvbuf;
2167   const PetscScalar     *av, *bv;
2168   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2169   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2170   PetscBLASInt           cm, cn, ck, alda, clda;
2171 
2172   PetscFunctionBegin;
2173   MatCheckProduct(C, 3);
2174   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2175   abt = (Mat_MatTransMultDense *)C->product->data;
2176   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2177   PetscCallMPI(MPI_Comm_size(comm, &size));
2178   PetscCall(MatDenseGetArrayRead(a->A, &av));
2179   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2180   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2181   PetscCall(MatDenseGetLDA(a->A, &i));
2182   PetscCall(PetscBLASIntCast(i, &alda));
2183   PetscCall(MatDenseGetLDA(b->A, &blda));
2184   PetscCall(MatDenseGetLDA(c->A, &i));
2185   PetscCall(PetscBLASIntCast(i, &clda));
2186   /* copy transpose of B into buf[0] */
2187   bn      = B->rmap->n;
2188   sendbuf = abt->buf[0];
2189   recvbuf = abt->buf[1];
2190   for (k = 0, j = 0; j < bn; j++) {
2191     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2192   }
2193   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2194   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2195   PetscCall(PetscBLASIntCast(cK, &ck));
2196   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2197   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2198   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2199   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2200   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2201   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2202   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2203   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2204   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2205   PetscFunctionReturn(PETSC_SUCCESS);
2206 }
2207 
2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2209 {
2210   Mat_MatTransMultDense *abt;
2211 
2212   PetscFunctionBegin;
2213   MatCheckProduct(C, 3);
2214   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2215   abt = (Mat_MatTransMultDense *)C->product->data;
2216   switch (abt->alg) {
2217   case 1:
2218     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2219     break;
2220   default:
2221     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2222     break;
2223   }
2224   PetscFunctionReturn(PETSC_SUCCESS);
2225 }
2226 
2227 #if defined(PETSC_HAVE_ELEMENTAL)
2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2229 {
2230   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2231 
2232   PetscFunctionBegin;
2233   PetscCall(MatDestroy(&ab->Ce));
2234   PetscCall(MatDestroy(&ab->Ae));
2235   PetscCall(MatDestroy(&ab->Be));
2236   PetscCall(PetscFree(ab));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2241 {
2242   Mat_MatMultDense *ab;
2243 
2244   PetscFunctionBegin;
2245   MatCheckProduct(C, 3);
2246   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2247   ab = (Mat_MatMultDense *)C->product->data;
2248   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2249   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2250   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2251   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2252   PetscFunctionReturn(PETSC_SUCCESS);
2253 }
2254 
2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2256 {
2257   Mat               Ae, Be, Ce;
2258   Mat_MatMultDense *ab;
2259 
2260   PetscFunctionBegin;
2261   MatCheckProduct(C, 4);
2262   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2263   /* check local size of A and B */
2264   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2265     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2266   }
2267 
2268   /* create elemental matrices Ae and Be */
2269   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2270   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2271   PetscCall(MatSetType(Ae, MATELEMENTAL));
2272   PetscCall(MatSetUp(Ae));
2273   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2274 
2275   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2276   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2277   PetscCall(MatSetType(Be, MATELEMENTAL));
2278   PetscCall(MatSetUp(Be));
2279   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2280 
2281   /* compute symbolic Ce = Ae*Be */
2282   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2283   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2284 
2285   /* setup C */
2286   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2287   PetscCall(MatSetType(C, MATDENSE));
2288   PetscCall(MatSetUp(C));
2289 
2290   /* create data structure for reuse Cdense */
2291   PetscCall(PetscNew(&ab));
2292   ab->Ae = Ae;
2293   ab->Be = Be;
2294   ab->Ce = Ce;
2295 
2296   C->product->data       = ab;
2297   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2298   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 #endif
2302 
2303 #if defined(PETSC_HAVE_ELEMENTAL)
2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2305 {
2306   PetscFunctionBegin;
2307   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2308   C->ops->productsymbolic = MatProductSymbolic_AB;
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 #endif
2312 
2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2314 {
2315   Mat_Product *product = C->product;
2316   Mat          A = product->A, B = product->B;
2317 
2318   PetscFunctionBegin;
2319   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2320              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2321   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2322   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2327 {
2328   Mat_Product *product     = C->product;
2329   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2330   PetscInt     alg, nalg = 2;
2331   PetscBool    flg = PETSC_FALSE;
2332 
2333   PetscFunctionBegin;
2334   /* Set default algorithm */
2335   alg = 0; /* default is allgatherv */
2336   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2337   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2338 
2339   /* Get runtime option */
2340   if (product->api_user) {
2341     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2342     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2343     PetscOptionsEnd();
2344   } else {
2345     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2346     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2347     PetscOptionsEnd();
2348   }
2349   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2350 
2351   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2352   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2353   PetscFunctionReturn(PETSC_SUCCESS);
2354 }
2355 
2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2357 {
2358   Mat_Product *product = C->product;
2359 
2360   PetscFunctionBegin;
2361   switch (product->type) {
2362 #if defined(PETSC_HAVE_ELEMENTAL)
2363   case MATPRODUCT_AB:
2364     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2365     break;
2366 #endif
2367   case MATPRODUCT_AtB:
2368     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2369     break;
2370   case MATPRODUCT_ABt:
2371     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2372     break;
2373   default:
2374     break;
2375   }
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378