xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3faff0630ddb0e5c928de8ab5c63f2cda2d2edd1)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
8baa3c1c6SHong Zhang #include <petscblaslapack.h>
98965ea79SLois Curfman McInnes 
10ab92ecdeSBarry Smith /*@
1111a5261eSBarry Smith       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
13ab92ecdeSBarry Smith 
14ab92ecdeSBarry Smith     Input Parameter:
1511a5261eSBarry Smith .      A - the sequential or MPI dense matrix
16ab92ecdeSBarry Smith 
17ab92ecdeSBarry Smith     Output Parameter:
18ab92ecdeSBarry Smith .      B - the inner matrix
19ab92ecdeSBarry Smith 
208e6c10adSSatish Balay     Level: intermediate
218e6c10adSSatish Balay 
2211a5261eSBarry Smith .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23ab92ecdeSBarry Smith @*/
249371c9d4SSatish Balay PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25ab92ecdeSBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26ace3abfcSBarry Smith   PetscBool     flg;
27ab92ecdeSBarry Smith 
28ab92ecdeSBarry Smith   PetscFunctionBegin;
29d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30d5ea218eSStefano Zampini   PetscValidPointer(B, 2);
319566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
322205254eSKarl Rupp   if (flg) *B = mat->A;
33f140bc17SStefano Zampini   else {
349566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
3528b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36f140bc17SStefano Zampini     *B = A;
37f140bc17SStefano Zampini   }
38ab92ecdeSBarry Smith   PetscFunctionReturn(0);
39ab92ecdeSBarry Smith }
40ab92ecdeSBarry Smith 
419371c9d4SSatish Balay PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42a76f77c3SStefano Zampini   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43a76f77c3SStefano Zampini   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44a76f77c3SStefano Zampini 
45a76f77c3SStefano Zampini   PetscFunctionBegin;
469566063dSJacob Faibussowitsch   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47a76f77c3SStefano Zampini   PetscFunctionReturn(0);
48a76f77c3SStefano Zampini }
49a76f77c3SStefano Zampini 
509371c9d4SSatish Balay PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
512f605a99SJose E. Roman   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
522f605a99SJose E. Roman   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
532f605a99SJose E. Roman   PetscScalar  *v;
542f605a99SJose E. Roman 
552f605a99SJose E. Roman   PetscFunctionBegin;
562f605a99SJose E. Roman   PetscCall(MatDenseGetArray(mat->A, &v));
572f605a99SJose E. Roman   PetscCall(MatDenseGetLDA(mat->A, &lda));
582f605a99SJose E. Roman   rend2 = PetscMin(rend, A->cmap->N);
592f605a99SJose E. Roman   if (rend2 > rstart) {
602f605a99SJose E. Roman     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
612f605a99SJose E. Roman     PetscCall(PetscLogFlops(rend2 - rstart));
622f605a99SJose E. Roman   }
632f605a99SJose E. Roman   PetscCall(MatDenseRestoreArray(mat->A, &v));
642f605a99SJose E. Roman   PetscFunctionReturn(0);
652f605a99SJose E. Roman }
662f605a99SJose E. Roman 
679371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68ba8c8a56SBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69d0f46423SBarry Smith   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70ba8c8a56SBarry Smith 
71ba8c8a56SBarry Smith   PetscFunctionBegin;
72aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73ba8c8a56SBarry Smith   lrow = row - rstart;
749566063dSJacob Faibussowitsch   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75ba8c8a56SBarry Smith   PetscFunctionReturn(0);
76ba8c8a56SBarry Smith }
77ba8c8a56SBarry Smith 
789371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79637a0070SStefano Zampini   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80637a0070SStefano Zampini   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81ba8c8a56SBarry Smith 
82ba8c8a56SBarry Smith   PetscFunctionBegin;
83aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84637a0070SStefano Zampini   lrow = row - rstart;
859566063dSJacob Faibussowitsch   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86ba8c8a56SBarry Smith   PetscFunctionReturn(0);
87ba8c8a56SBarry Smith }
88ba8c8a56SBarry Smith 
899371c9d4SSatish Balay PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
900de54da6SSatish Balay   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91d0f46423SBarry Smith   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
9287828ca2SBarry Smith   PetscScalar  *array;
930de54da6SSatish Balay   MPI_Comm      comm;
944b6ae80fSStefano Zampini   PetscBool     flg;
9511bd1e4dSLisandro Dalcin   Mat           B;
960de54da6SSatish Balay 
970de54da6SSatish Balay   PetscFunctionBegin;
989566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &flg));
9928b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
1009566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
1024b6ae80fSStefano Zampini 
1039566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
10428b400f6SJacob Faibussowitsch     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
1059566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
1069566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &B));
1079566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, m, m, m, m));
1089566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
1099566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
1109566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
1119566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
1129566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
11311bd1e4dSLisandro Dalcin     *a = B;
1149566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
1152205254eSKarl Rupp   } else *a = B;
1160de54da6SSatish Balay   PetscFunctionReturn(0);
1170de54da6SSatish Balay }
1180de54da6SSatish Balay 
1199371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
12039b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122ace3abfcSBarry Smith   PetscBool     roworiented = A->roworiented;
1238965ea79SLois Curfman McInnes 
1243a40ed3dSBarry Smith   PetscFunctionBegin;
1258965ea79SLois Curfman McInnes   for (i = 0; i < m; i++) {
1265ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
12708401ef6SPierre Jolivet     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
1288965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1298965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
13039b7565bSBarry Smith       if (roworiented) {
1319566063dSJacob Faibussowitsch         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
1323a40ed3dSBarry Smith       } else {
1338965ea79SLois Curfman McInnes         for (j = 0; j < n; j++) {
1345ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
13508401ef6SPierre Jolivet           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1369566063dSJacob Faibussowitsch           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
13739b7565bSBarry Smith         }
1388965ea79SLois Curfman McInnes       }
1392205254eSKarl Rupp     } else if (!A->donotstash) {
1405080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
14139b7565bSBarry Smith       if (roworiented) {
1429566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143d36fbae8SSatish Balay       } else {
1449566063dSJacob Faibussowitsch         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
14539b7565bSBarry Smith       }
146b49de8d1SLois Curfman McInnes     }
147b49de8d1SLois Curfman McInnes   }
1483a40ed3dSBarry Smith   PetscFunctionReturn(0);
149b49de8d1SLois Curfman McInnes }
150b49de8d1SLois Curfman McInnes 
1519371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154b49de8d1SLois Curfman McInnes 
1553a40ed3dSBarry Smith   PetscFunctionBegin;
156b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
15754c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
15854c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
160b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
161b49de8d1SLois Curfman McInnes       for (j = 0; j < n; j++) {
16254c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
16354c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1649566063dSJacob Faibussowitsch         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165b49de8d1SLois Curfman McInnes       }
166e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
1678965ea79SLois Curfman McInnes   }
1683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1698965ea79SLois Curfman McInnes }
1708965ea79SLois Curfman McInnes 
1719371c9d4SSatish Balay static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
17249a6ff4bSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17349a6ff4bSBarry Smith 
17449a6ff4bSBarry Smith   PetscFunctionBegin;
1759566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, lda));
17649a6ff4bSBarry Smith   PetscFunctionReturn(0);
17749a6ff4bSBarry Smith }
17849a6ff4bSBarry Smith 
1799371c9d4SSatish Balay static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180ad16ce7aSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
18117359960SJose E. Roman   PetscBool     iscuda;
182ad16ce7aSStefano Zampini 
183ad16ce7aSStefano Zampini   PetscFunctionBegin;
18417359960SJose E. Roman   if (!a->A) {
18528b400f6SJacob Faibussowitsch     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1869566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->rmap));
1879566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->cmap));
1889566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1899566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A));
1909566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1919566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1929566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
19317359960SJose E. Roman   }
1949566063dSJacob Faibussowitsch   PetscCall(MatDenseSetLDA(a->A, lda));
195ad16ce7aSStefano Zampini   PetscFunctionReturn(0);
196ad16ce7aSStefano Zampini }
197ad16ce7aSStefano Zampini 
1989371c9d4SSatish Balay static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
199ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
200ff14e315SSatish Balay 
2013a40ed3dSBarry Smith   PetscFunctionBegin;
20228b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2039566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, array));
2043a40ed3dSBarry Smith   PetscFunctionReturn(0);
205ff14e315SSatish Balay }
206ff14e315SSatish Balay 
2079371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
2088572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2098572280aSBarry Smith 
2108572280aSBarry Smith   PetscFunctionBegin;
21128b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2129566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, array));
2138572280aSBarry Smith   PetscFunctionReturn(0);
2148572280aSBarry Smith }
2158572280aSBarry Smith 
2169371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
2176947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2186947451fSStefano Zampini 
2196947451fSStefano Zampini   PetscFunctionBegin;
22028b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2219566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, array));
2226947451fSStefano Zampini   PetscFunctionReturn(0);
2236947451fSStefano Zampini }
2246947451fSStefano Zampini 
2259371c9d4SSatish Balay static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
226d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
227d3042a70SBarry Smith 
228d3042a70SBarry Smith   PetscFunctionBegin;
22928b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
23028b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2319566063dSJacob Faibussowitsch   PetscCall(MatDensePlaceArray(a->A, array));
232d3042a70SBarry Smith   PetscFunctionReturn(0);
233d3042a70SBarry Smith }
234d3042a70SBarry Smith 
2359371c9d4SSatish Balay static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
236d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
237d3042a70SBarry Smith 
238d3042a70SBarry Smith   PetscFunctionBegin;
23928b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
24028b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2419566063dSJacob Faibussowitsch   PetscCall(MatDenseResetArray(a->A));
242d3042a70SBarry Smith   PetscFunctionReturn(0);
243d3042a70SBarry Smith }
244d3042a70SBarry Smith 
2459371c9d4SSatish Balay static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
246d5ea218eSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
247d5ea218eSStefano Zampini 
248d5ea218eSStefano Zampini   PetscFunctionBegin;
24928b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
25028b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2519566063dSJacob Faibussowitsch   PetscCall(MatDenseReplaceArray(a->A, array));
252d5ea218eSStefano Zampini   PetscFunctionReturn(0);
253d5ea218eSStefano Zampini }
254d5ea218eSStefano Zampini 
2559371c9d4SSatish Balay static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
256ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
257637a0070SStefano Zampini   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
2585d0c19d7SBarry Smith   const PetscInt    *irow, *icol;
259637a0070SStefano Zampini   const PetscScalar *v;
260637a0070SStefano Zampini   PetscScalar       *bv;
261ca3fa75bSLois Curfman McInnes   Mat                newmat;
2624aa3045dSJed Brown   IS                 iscol_local;
26342a884f0SBarry Smith   MPI_Comm           comm_is, comm_mat;
264ca3fa75bSLois Curfman McInnes 
265ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
2669566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
2679566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
26808401ef6SPierre Jolivet   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
26942a884f0SBarry Smith 
2709566063dSJacob Faibussowitsch   PetscCall(ISAllGather(iscol, &iscol_local));
2719566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &irow));
2729566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &icol));
2739566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &nrows));
2749566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
2759566063dSJacob Faibussowitsch   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
276ca3fa75bSLois Curfman McInnes 
277ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2787eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2797eba5e9cSLois Curfman McInnes      original matrix! */
280ca3fa75bSLois Curfman McInnes 
2819566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
2829566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
283ca3fa75bSLois Curfman McInnes 
284ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
285ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
286e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2877eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
288ca3fa75bSLois Curfman McInnes     newmat = *B;
289ca3fa75bSLois Curfman McInnes   } else {
290ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
2919566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
2929566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
2939566063dSJacob Faibussowitsch     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
2949566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
295ca3fa75bSLois Curfman McInnes   }
296ca3fa75bSLois Curfman McInnes 
297ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
298ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense *)newmat->data;
2999566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(newmatd->A, &bv));
3009566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mat->A, &v));
3019566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mat->A, &lda));
3024aa3045dSJed Brown   for (i = 0; i < Ncols; i++) {
303637a0070SStefano Zampini     const PetscScalar *av = v + lda * icol[i];
304ad540459SPierre Jolivet     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
305ca3fa75bSLois Curfman McInnes   }
3069566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
3079566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
308ca3fa75bSLois Curfman McInnes 
309ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
3109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
3119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
312ca3fa75bSLois Curfman McInnes 
313ca3fa75bSLois Curfman McInnes   /* Free work space */
3149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &irow));
3159566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &icol));
3169566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&iscol_local));
317ca3fa75bSLois Curfman McInnes   *B = newmat;
318ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
319ca3fa75bSLois Curfman McInnes }
320ca3fa75bSLois Curfman McInnes 
3219371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
32273a71a0fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
32373a71a0fSBarry Smith 
3243a40ed3dSBarry Smith   PetscFunctionBegin;
3259566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, array));
3263a40ed3dSBarry Smith   PetscFunctionReturn(0);
327ff14e315SSatish Balay }
328ff14e315SSatish Balay 
3299371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
3308572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3318572280aSBarry Smith 
3328572280aSBarry Smith   PetscFunctionBegin;
3339566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, array));
3348572280aSBarry Smith   PetscFunctionReturn(0);
3358572280aSBarry Smith }
3368572280aSBarry Smith 
3379371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
3386947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3396947451fSStefano Zampini 
3406947451fSStefano Zampini   PetscFunctionBegin;
3419566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
3426947451fSStefano Zampini   PetscFunctionReturn(0);
3436947451fSStefano Zampini }
3446947451fSStefano Zampini 
3459371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
34639ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
34713f74950SBarry Smith   PetscInt      nstash, reallocs;
3488965ea79SLois Curfman McInnes 
3493a40ed3dSBarry Smith   PetscFunctionBegin;
3504067683fSStefano Zampini   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
3518965ea79SLois Curfman McInnes 
3529566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
3539566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
3549566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
3553a40ed3dSBarry Smith   PetscFunctionReturn(0);
3568965ea79SLois Curfman McInnes }
3578965ea79SLois Curfman McInnes 
3589371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
35939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
36013f74950SBarry Smith   PetscInt      i, *row, *col, flg, j, rstart, ncols;
36113f74950SBarry Smith   PetscMPIInt   n;
36287828ca2SBarry Smith   PetscScalar  *val;
3638965ea79SLois Curfman McInnes 
3643a40ed3dSBarry Smith   PetscFunctionBegin;
365910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
3668965ea79SLois Curfman McInnes     /*  wait on receives */
3677ef1d9bdSSatish Balay     while (1) {
3689566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
3697ef1d9bdSSatish Balay       if (!flg) break;
3708965ea79SLois Curfman McInnes 
3717ef1d9bdSSatish Balay       for (i = 0; i < n;) {
3727ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
3732205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
3742205254eSKarl Rupp           if (row[j] != rstart) break;
3752205254eSKarl Rupp         }
3767ef1d9bdSSatish Balay         if (j < n) ncols = j - i;
3777ef1d9bdSSatish Balay         else ncols = n - i;
3787ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
3799566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
3807ef1d9bdSSatish Balay         i = j;
3818965ea79SLois Curfman McInnes       }
3827ef1d9bdSSatish Balay     }
3839566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
384910cf402Sprj-   }
3858965ea79SLois Curfman McInnes 
3869566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mdn->A, mode));
3879566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mdn->A, mode));
3883a40ed3dSBarry Smith   PetscFunctionReturn(0);
3898965ea79SLois Curfman McInnes }
3908965ea79SLois Curfman McInnes 
3919371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
39239ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
3933a40ed3dSBarry Smith 
3943a40ed3dSBarry Smith   PetscFunctionBegin;
3959566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
3963a40ed3dSBarry Smith   PetscFunctionReturn(0);
3978965ea79SLois Curfman McInnes }
3988965ea79SLois Curfman McInnes 
3999371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
40039ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401637a0070SStefano Zampini   PetscInt      i, len, *lrows;
402637a0070SStefano Zampini 
403637a0070SStefano Zampini   PetscFunctionBegin;
404637a0070SStefano Zampini   /* get locally owned rows */
4059566063dSJacob Faibussowitsch   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
406637a0070SStefano Zampini   /* fix right hand side if needed */
407637a0070SStefano Zampini   if (x && b) {
40897b48c8fSBarry Smith     const PetscScalar *xx;
40997b48c8fSBarry Smith     PetscScalar       *bb;
4108965ea79SLois Curfman McInnes 
4119566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
4129566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(b, &bb));
413637a0070SStefano Zampini     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
4149566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
4159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(b, &bb));
41697b48c8fSBarry Smith   }
4179566063dSJacob Faibussowitsch   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
418e2eb51b1SBarry Smith   if (diag != 0.0) {
419637a0070SStefano Zampini     Vec d;
420b9679d65SBarry Smith 
4219566063dSJacob Faibussowitsch     PetscCall(MatCreateVecs(A, NULL, &d));
4229566063dSJacob Faibussowitsch     PetscCall(VecSet(d, diag));
4239566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
4249566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&d));
425b9679d65SBarry Smith   }
4269566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
4273a40ed3dSBarry Smith   PetscFunctionReturn(0);
4288965ea79SLois Curfman McInnes }
4298965ea79SLois Curfman McInnes 
430cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
431cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
432cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
433cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
434cc2e6a90SBarry Smith 
4359371c9d4SSatish Balay PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
43639ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
437637a0070SStefano Zampini   const PetscScalar *ax;
438637a0070SStefano Zampini   PetscScalar       *ay;
439d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
440c456f294SBarry Smith 
4413a40ed3dSBarry Smith   PetscFunctionBegin;
4426f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4449566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4459566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4469566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4499566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
4503a40ed3dSBarry Smith   PetscFunctionReturn(0);
4518965ea79SLois Curfman McInnes }
4528965ea79SLois Curfman McInnes 
4539371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
45439ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
455637a0070SStefano Zampini   const PetscScalar *ax;
456637a0070SStefano Zampini   PetscScalar       *ay;
457d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
458c456f294SBarry Smith 
4593a40ed3dSBarry Smith   PetscFunctionBegin;
4606f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4639566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4649566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4679566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
4683a40ed3dSBarry Smith   PetscFunctionReturn(0);
4698965ea79SLois Curfman McInnes }
4708965ea79SLois Curfman McInnes 
4719371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
472096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
473637a0070SStefano Zampini   const PetscScalar *ax;
474637a0070SStefano Zampini   PetscScalar       *ay;
475d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
476096963f5SLois Curfman McInnes 
4773a40ed3dSBarry Smith   PetscFunctionBegin;
4786f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
4799566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
4809566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
4819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
4829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
4839566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
4849566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
4859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
4869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
4873a40ed3dSBarry Smith   PetscFunctionReturn(0);
488096963f5SLois Curfman McInnes }
489096963f5SLois Curfman McInnes 
4909371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
491096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
492637a0070SStefano Zampini   const PetscScalar *ax;
493637a0070SStefano Zampini   PetscScalar       *ay;
494d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
495096963f5SLois Curfman McInnes 
4963a40ed3dSBarry Smith   PetscFunctionBegin;
4976f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
4989566063dSJacob Faibussowitsch   PetscCall(VecCopy(yy, zz));
4999566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
5009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5019566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
5029566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5039566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
5063a40ed3dSBarry Smith   PetscFunctionReturn(0);
507096963f5SLois Curfman McInnes }
508096963f5SLois Curfman McInnes 
5099371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
51039ddd567SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
511637a0070SStefano Zampini   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
51287828ca2SBarry Smith   PetscScalar       *x, zero = 0.0;
513637a0070SStefano Zampini   const PetscScalar *av;
514ed3cc1f0SBarry Smith 
5153a40ed3dSBarry Smith   PetscFunctionBegin;
5169566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
5179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
5189566063dSJacob Faibussowitsch   PetscCall(VecGetSize(v, &n));
51908401ef6SPierre Jolivet   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
520d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
521d0f46423SBarry Smith   radd = A->rmap->rstart * m;
5229566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
5239566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
524ad540459SPierre Jolivet   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
5259566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
5269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
5273a40ed3dSBarry Smith   PetscFunctionReturn(0);
5288965ea79SLois Curfman McInnes }
5298965ea79SLois Curfman McInnes 
5309371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIDense(Mat mat) {
5313501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
532ed3cc1f0SBarry Smith 
5333a40ed3dSBarry Smith   PetscFunctionBegin;
534aa482453SBarry Smith #if defined(PETSC_USE_LOG)
535c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
5368965ea79SLois Curfman McInnes #endif
5379566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
53828b400f6SJacob Faibussowitsch   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
53928b400f6SJacob Faibussowitsch   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
5409566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->A));
5419566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->lvec));
5429566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mdn->Mvctx));
5439566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->cvec));
5449566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->cmat));
54501b82886SBarry Smith 
5469566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
5479566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
5488baccfbdSHong Zhang 
5499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
5509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
5519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
5529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
5539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
5549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
5559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
5569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
5579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
5589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
5599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
5609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
5619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
5628baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
5648baccfbdSHong Zhang #endif
565d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
5669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
567d24d4204SJose E. Roman #endif
5689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
5699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
5709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
5716718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
5729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
5739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
5742e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
5752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
5762e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
5772e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
5782e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
5792e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
5802e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
5812e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
5822e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
5832e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
5842e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
5852e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
5862e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
5872e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
5882e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
5896718818eSStefano Zampini #endif
5909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
5919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
5929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
5939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
5949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
5959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
5969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
5979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
5989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
5999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
600f8103e6bSStefano Zampini 
601f8103e6bSStefano Zampini   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
6023a40ed3dSBarry Smith   PetscFunctionReturn(0);
6038965ea79SLois Curfman McInnes }
60439ddd567SLois Curfman McInnes 
60552c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
60652c5f739Sprj- 
6079804daf3SBarry Smith #include <petscdraw.h>
6089371c9d4SSatish Balay static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
60939ddd567SLois Curfman McInnes   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
610616b8fbbSStefano Zampini   PetscMPIInt       rank;
61119fd82e9SBarry Smith   PetscViewerType   vtype;
612ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
613b0a32e0cSBarry Smith   PetscViewer       sviewer;
614f3ef73ceSBarry Smith   PetscViewerFormat format;
6158965ea79SLois Curfman McInnes 
6163a40ed3dSBarry Smith   PetscFunctionBegin;
6179566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
6189566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
6199566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
62032077d6dSBarry Smith   if (iascii) {
6219566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetType(viewer, &vtype));
6229566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
623456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6244e220ebcSLois Curfman McInnes       MatInfo info;
6259566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
6269566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
6279371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
6289371c9d4SSatish Balay                                                    (PetscInt)info.memory));
6299566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
6309566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
6316f08ad05SPierre Jolivet       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
6323a40ed3dSBarry Smith       PetscFunctionReturn(0);
633fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6343a40ed3dSBarry Smith       PetscFunctionReturn(0);
6358965ea79SLois Curfman McInnes     }
636f1af5d2fSBarry Smith   } else if (isdraw) {
637b0a32e0cSBarry Smith     PetscDraw draw;
638ace3abfcSBarry Smith     PetscBool isnull;
639f1af5d2fSBarry Smith 
6409566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
6419566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
642f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
643f1af5d2fSBarry Smith   }
64477ed5343SBarry Smith 
6457da1fb6eSBarry Smith   {
6468965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6478965ea79SLois Curfman McInnes     Mat          A;
648d0f46423SBarry Smith     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
649ba8c8a56SBarry Smith     PetscInt    *cols;
650ba8c8a56SBarry Smith     PetscScalar *vals;
6518965ea79SLois Curfman McInnes 
6529566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
653dd400576SPatrick Sanan     if (rank == 0) {
6549566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
6553a40ed3dSBarry Smith     } else {
6569566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
6578965ea79SLois Curfman McInnes     }
6587adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
6599566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPIDENSE));
6609566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
6619566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
6628965ea79SLois Curfman McInnes 
66339ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
66439ddd567SLois Curfman McInnes        but it's quick for now */
66551022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6662205254eSKarl Rupp 
6672205254eSKarl Rupp     row = mat->rmap->rstart;
6682205254eSKarl Rupp     m   = mdn->A->rmap->n;
6698965ea79SLois Curfman McInnes     for (i = 0; i < m; i++) {
6709566063dSJacob Faibussowitsch       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
6719566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
6729566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
67339ddd567SLois Curfman McInnes       row++;
6748965ea79SLois Curfman McInnes     }
6758965ea79SLois Curfman McInnes 
6769566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
6779566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
6789566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
679dd400576SPatrick Sanan     if (rank == 0) {
6809566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
6819566063dSJacob Faibussowitsch       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
6828965ea79SLois Curfman McInnes     }
6839566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
6849566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
6859566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
6868965ea79SLois Curfman McInnes   }
6873a40ed3dSBarry Smith   PetscFunctionReturn(0);
6888965ea79SLois Curfman McInnes }
6898965ea79SLois Curfman McInnes 
6909371c9d4SSatish Balay PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
691ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw, issocket;
6928965ea79SLois Curfman McInnes 
693433994e6SBarry Smith   PetscFunctionBegin;
6949566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
6959566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
6969566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
6979566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
6980f5bd95cSBarry Smith 
69932077d6dSBarry Smith   if (iascii || issocket || isdraw) {
7009566063dSJacob Faibussowitsch     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
7011baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
7023a40ed3dSBarry Smith   PetscFunctionReturn(0);
7038965ea79SLois Curfman McInnes }
7048965ea79SLois Curfman McInnes 
7059371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
7063501a2bdSLois Curfman McInnes   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
7073501a2bdSLois Curfman McInnes   Mat            mdn = mat->A;
7083966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
7098965ea79SLois Curfman McInnes 
7103a40ed3dSBarry Smith   PetscFunctionBegin;
7114e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7122205254eSKarl Rupp 
7139566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
7142205254eSKarl Rupp 
7159371c9d4SSatish Balay   isend[0] = info->nz_used;
7169371c9d4SSatish Balay   isend[1] = info->nz_allocated;
7179371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
7189371c9d4SSatish Balay   isend[3] = info->memory;
7199371c9d4SSatish Balay   isend[4] = info->mallocs;
7208965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7214e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7224e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7234e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7244e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7254e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7268965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
7271c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
7282205254eSKarl Rupp 
7294e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7304e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7314e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7324e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7334e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7348965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
7351c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
7362205254eSKarl Rupp 
7374e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7384e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7394e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7404e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7414e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7428965ea79SLois Curfman McInnes   }
7434e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7444e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7454e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7463a40ed3dSBarry Smith   PetscFunctionReturn(0);
7478965ea79SLois Curfman McInnes }
7488965ea79SLois Curfman McInnes 
7499371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
75039ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
7518965ea79SLois Curfman McInnes 
7523a40ed3dSBarry Smith   PetscFunctionBegin;
75312c028f9SKris Buschelman   switch (op) {
754512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
75512c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
75612c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
75743674050SBarry Smith     MatCheckPreallocated(A, 1);
7589566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
75912c028f9SKris Buschelman     break;
76012c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
76143674050SBarry Smith     MatCheckPreallocated(A, 1);
7624e0d8c25SBarry Smith     a->roworiented = flg;
7639566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
76412c028f9SKris Buschelman     break;
7658c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
76613fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
76712c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
7689371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
7699371c9d4SSatish Balay   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
77077e54ba9SKris Buschelman   case MAT_SYMMETRIC:
77177e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7729a4540c5SBarry Smith   case MAT_HERMITIAN:
7739a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
774b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
775b94d7dedSBarry Smith   case MAT_SPD:
776600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
7775d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
778b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
779b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
7809566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
78177e54ba9SKris Buschelman     break;
7829371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
7833a40ed3dSBarry Smith   }
7843a40ed3dSBarry Smith   PetscFunctionReturn(0);
7858965ea79SLois Curfman McInnes }
7868965ea79SLois Curfman McInnes 
7879371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
7885b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
789637a0070SStefano Zampini   const PetscScalar *l;
790637a0070SStefano Zampini   PetscScalar        x, *v, *vv, *r;
791637a0070SStefano Zampini   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
7925b2fa520SLois Curfman McInnes 
7935b2fa520SLois Curfman McInnes   PetscFunctionBegin;
7949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(mdn->A, &vv));
7959566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mdn->A, &lda));
7969566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &s2, &s3));
7975b2fa520SLois Curfman McInnes   if (ll) {
7989566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s2a));
79908401ef6SPierre Jolivet     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
8009566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(ll, &l));
8015b2fa520SLois Curfman McInnes     for (i = 0; i < m; i++) {
8025b2fa520SLois Curfman McInnes       x = l[i];
803637a0070SStefano Zampini       v = vv + i;
8049371c9d4SSatish Balay       for (j = 0; j < n; j++) {
8059371c9d4SSatish Balay         (*v) *= x;
8069371c9d4SSatish Balay         v += lda;
8079371c9d4SSatish Balay       }
8085b2fa520SLois Curfman McInnes     }
8099566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(ll, &l));
8109566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
8115b2fa520SLois Curfman McInnes   }
8125b2fa520SLois Curfman McInnes   if (rr) {
813637a0070SStefano Zampini     const PetscScalar *ar;
814637a0070SStefano Zampini 
8159566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s3a));
81608401ef6SPierre Jolivet     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
8179566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(rr, &ar));
8186f08ad05SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
8199566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mdn->lvec, &r));
8209566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
8219566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
8229566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(rr, &ar));
8235b2fa520SLois Curfman McInnes     for (i = 0; i < n; i++) {
8245b2fa520SLois Curfman McInnes       x = r[i];
825637a0070SStefano Zampini       v = vv + i * lda;
8262205254eSKarl Rupp       for (j = 0; j < m; j++) (*v++) *= x;
8275b2fa520SLois Curfman McInnes     }
8289566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mdn->lvec, &r));
8299566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
8305b2fa520SLois Curfman McInnes   }
8319566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
8325b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8335b2fa520SLois Curfman McInnes }
8345b2fa520SLois Curfman McInnes 
8359371c9d4SSatish Balay PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
8363501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
83713f74950SBarry Smith   PetscInt           i, j;
838616b8fbbSStefano Zampini   PetscMPIInt        size;
839329f5518SBarry Smith   PetscReal          sum = 0.0;
840637a0070SStefano Zampini   const PetscScalar *av, *v;
8413501a2bdSLois Curfman McInnes 
8423a40ed3dSBarry Smith   PetscFunctionBegin;
8439566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
844637a0070SStefano Zampini   v = av;
8459566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
846616b8fbbSStefano Zampini   if (size == 1) {
8479566063dSJacob Faibussowitsch     PetscCall(MatNorm(mdn->A, type, nrm));
8483501a2bdSLois Curfman McInnes   } else {
8493501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
850d0f46423SBarry Smith       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
8519371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
8529371c9d4SSatish Balay         v++;
8533501a2bdSLois Curfman McInnes       }
8541c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
8558f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
8569566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
8573a40ed3dSBarry Smith     } else if (type == NORM_1) {
858329f5518SBarry Smith       PetscReal *tmp, *tmp2;
8599566063dSJacob Faibussowitsch       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
860064f8208SBarry Smith       *nrm = 0.0;
861637a0070SStefano Zampini       v    = av;
862d0f46423SBarry Smith       for (j = 0; j < mdn->A->cmap->n; j++) {
863d0f46423SBarry Smith         for (i = 0; i < mdn->A->rmap->n; i++) {
8649371c9d4SSatish Balay           tmp[j] += PetscAbsScalar(*v);
8659371c9d4SSatish Balay           v++;
8663501a2bdSLois Curfman McInnes         }
8673501a2bdSLois Curfman McInnes       }
8681c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
869d0f46423SBarry Smith       for (j = 0; j < A->cmap->N; j++) {
870064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8713501a2bdSLois Curfman McInnes       }
8729566063dSJacob Faibussowitsch       PetscCall(PetscFree2(tmp, tmp2));
8739566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
8743a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
875329f5518SBarry Smith       PetscReal ntemp;
8769566063dSJacob Faibussowitsch       PetscCall(MatNorm(mdn->A, type, &ntemp));
8771c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
878ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
8793501a2bdSLois Curfman McInnes   }
8809566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
8813a40ed3dSBarry Smith   PetscFunctionReturn(0);
8823501a2bdSLois Curfman McInnes }
8833501a2bdSLois Curfman McInnes 
8849371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
8853501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
8863501a2bdSLois Curfman McInnes   Mat           B;
887d0f46423SBarry Smith   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
888637a0070SStefano Zampini   PetscInt      j, i, lda;
88987828ca2SBarry Smith   PetscScalar  *v;
8903501a2bdSLois Curfman McInnes 
8913a40ed3dSBarry Smith   PetscFunctionBegin;
8927fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
893cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
8949566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
8959566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
8969566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
8979566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
898637a0070SStefano Zampini   } else B = *matout;
8993501a2bdSLois Curfman McInnes 
9009371c9d4SSatish Balay   m = a->A->rmap->n;
9019371c9d4SSatish Balay   n = a->A->cmap->n;
9029566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
9039566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
9049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rwork));
9053501a2bdSLois Curfman McInnes   for (i = 0; i < m; i++) rwork[i] = rstart + i;
9061acff37aSSatish Balay   for (j = 0; j < n; j++) {
9079566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
908637a0070SStefano Zampini     v += lda;
9093501a2bdSLois Curfman McInnes   }
9109566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
9119566063dSJacob Faibussowitsch   PetscCall(PetscFree(rwork));
9129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9139566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
914cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9153501a2bdSLois Curfman McInnes     *matout = B;
9163501a2bdSLois Curfman McInnes   } else {
9179566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
9183501a2bdSLois Curfman McInnes   }
9193a40ed3dSBarry Smith   PetscFunctionReturn(0);
920096963f5SLois Curfman McInnes }
921096963f5SLois Curfman McInnes 
9226849ba73SBarry Smith static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
92352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
9248965ea79SLois Curfman McInnes 
9259371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIDense(Mat A) {
926273d9f13SBarry Smith   PetscFunctionBegin;
9279566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
9289566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
92948a46eb9SPierre Jolivet   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
930273d9f13SBarry Smith   PetscFunctionReturn(0);
931273d9f13SBarry Smith }
932273d9f13SBarry Smith 
9339371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
934488007eeSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
935488007eeSBarry Smith 
936488007eeSBarry Smith   PetscFunctionBegin;
9379566063dSJacob Faibussowitsch   PetscCall(MatAXPY(A->A, alpha, B->A, str));
938488007eeSBarry Smith   PetscFunctionReturn(0);
939488007eeSBarry Smith }
940488007eeSBarry Smith 
9419371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIDense(Mat mat) {
942ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
943ba337c44SJed Brown 
944ba337c44SJed Brown   PetscFunctionBegin;
9459566063dSJacob Faibussowitsch   PetscCall(MatConjugate(a->A));
946ba337c44SJed Brown   PetscFunctionReturn(0);
947ba337c44SJed Brown }
948ba337c44SJed Brown 
9499371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIDense(Mat A) {
950ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951ba337c44SJed Brown 
952ba337c44SJed Brown   PetscFunctionBegin;
9539566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
954ba337c44SJed Brown   PetscFunctionReturn(0);
955ba337c44SJed Brown }
956ba337c44SJed Brown 
9579371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
958ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959ba337c44SJed Brown 
960ba337c44SJed Brown   PetscFunctionBegin;
9619566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
962ba337c44SJed Brown   PetscFunctionReturn(0);
963ba337c44SJed Brown }
964ba337c44SJed Brown 
9659371c9d4SSatish Balay static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
966637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
96749a6ff4bSBarry Smith 
96849a6ff4bSBarry Smith   PetscFunctionBegin;
96928b400f6SJacob Faibussowitsch   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
97028b400f6SJacob Faibussowitsch   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
9719566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
97249a6ff4bSBarry Smith   PetscFunctionReturn(0);
97349a6ff4bSBarry Smith }
97449a6ff4bSBarry Smith 
975857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
97652c5f739Sprj- 
9779371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
978a873a8cdSSam Reynolds   PetscInt      i, m, n;
9790716a85fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
9800716a85fSBarry Smith   PetscReal    *work;
9810716a85fSBarry Smith 
9820716a85fSBarry Smith   PetscFunctionBegin;
9839566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
9849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &work));
985857cbf51SRichard Tran Mills   if (type == REDUCTION_MEAN_REALPART) {
9869566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
987857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
9889566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
989a873a8cdSSam Reynolds   } else {
9909566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
991a873a8cdSSam Reynolds   }
992857cbf51SRichard Tran Mills   if (type == NORM_2) {
9930716a85fSBarry Smith     for (i = 0; i < n; i++) work[i] *= work[i];
9940716a85fSBarry Smith   }
995857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
9961c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
9970716a85fSBarry Smith   } else {
9981c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
9990716a85fSBarry Smith   }
10009566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
1001857cbf51SRichard Tran Mills   if (type == NORM_2) {
1002a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1003857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1004a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
10050716a85fSBarry Smith   }
10060716a85fSBarry Smith   PetscFunctionReturn(0);
10070716a85fSBarry Smith }
10080716a85fSBarry Smith 
1009637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
10109371c9d4SSatish Balay PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1011126e4e93SJose E. Roman   PetscScalar *da;
1012126e4e93SJose E. Roman   PetscInt     lda;
1013126e4e93SJose E. Roman 
1014126e4e93SJose E. Roman   PetscFunctionBegin;
1015126e4e93SJose E. Roman   PetscCall(MatDenseCUDAGetArray(A, &da));
1016126e4e93SJose E. Roman   PetscCall(MatDenseGetLDA(A, &lda));
1017126e4e93SJose E. Roman   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1018126e4e93SJose E. Roman   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1019126e4e93SJose E. Roman   PetscCall(MatDenseCUDARestoreArray(A, &da));
1020126e4e93SJose E. Roman   PetscFunctionReturn(0);
1021126e4e93SJose E. Roman }
1022126e4e93SJose E. Roman 
10239371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10246947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10256947451fSStefano Zampini   PetscInt      lda;
10266947451fSStefano Zampini 
10276947451fSStefano Zampini   PetscFunctionBegin;
102828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
102928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
10306947451fSStefano Zampini   if (!a->cvec) {
10319566063dSJacob Faibussowitsch     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
10329566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
10336947451fSStefano Zampini   }
10346947451fSStefano Zampini   a->vecinuse = col + 1;
10359566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10369566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
10379566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
10386947451fSStefano Zampini   *v = a->cvec;
10396947451fSStefano Zampini   PetscFunctionReturn(0);
10406947451fSStefano Zampini }
10416947451fSStefano Zampini 
10429371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10436947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10446947451fSStefano Zampini 
10456947451fSStefano Zampini   PetscFunctionBegin;
104628b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
104728b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
10486947451fSStefano Zampini   a->vecinuse = 0;
10499566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
10509566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1051742765d3SMatthew Knepley   if (v) *v = NULL;
10526947451fSStefano Zampini   PetscFunctionReturn(0);
10536947451fSStefano Zampini }
10546947451fSStefano Zampini 
10559371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10566947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10576947451fSStefano Zampini   PetscInt      lda;
10586947451fSStefano Zampini 
10596947451fSStefano Zampini   PetscFunctionBegin;
106028b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
106128b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
10626947451fSStefano Zampini   if (!a->cvec) {
10639566063dSJacob Faibussowitsch     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
10649566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
10656947451fSStefano Zampini   }
10666947451fSStefano Zampini   a->vecinuse = col + 1;
10679566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10689566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
10699566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
10709566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
10716947451fSStefano Zampini   *v = a->cvec;
10726947451fSStefano Zampini   PetscFunctionReturn(0);
10736947451fSStefano Zampini }
10746947451fSStefano Zampini 
10759371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10766947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10776947451fSStefano Zampini 
10786947451fSStefano Zampini   PetscFunctionBegin;
107928b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
108028b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
10816947451fSStefano Zampini   a->vecinuse = 0;
10829566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
10839566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
10849566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1085742765d3SMatthew Knepley   if (v) *v = NULL;
10866947451fSStefano Zampini   PetscFunctionReturn(0);
10876947451fSStefano Zampini }
10886947451fSStefano Zampini 
10899371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10906947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10916947451fSStefano Zampini   PetscInt      lda;
10926947451fSStefano Zampini 
10936947451fSStefano Zampini   PetscFunctionBegin;
109428b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
109528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
10966947451fSStefano Zampini   if (!a->cvec) {
10979566063dSJacob Faibussowitsch     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
10989566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
10996947451fSStefano Zampini   }
11006947451fSStefano Zampini   a->vecinuse = col + 1;
11019566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
11029566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
11039566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
11046947451fSStefano Zampini   *v = a->cvec;
11056947451fSStefano Zampini   PetscFunctionReturn(0);
11066947451fSStefano Zampini }
11076947451fSStefano Zampini 
11089371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
11096947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
11106947451fSStefano Zampini 
11116947451fSStefano Zampini   PetscFunctionBegin;
111228b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
111328b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
11146947451fSStefano Zampini   a->vecinuse = 0;
11159566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
11169566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1117742765d3SMatthew Knepley   if (v) *v = NULL;
11186947451fSStefano Zampini   PetscFunctionReturn(0);
11196947451fSStefano Zampini }
11206947451fSStefano Zampini 
11219371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1122637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1123637a0070SStefano Zampini 
1124637a0070SStefano Zampini   PetscFunctionBegin;
112528b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
112628b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11279566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1128637a0070SStefano Zampini   PetscFunctionReturn(0);
1129637a0070SStefano Zampini }
1130637a0070SStefano Zampini 
11319371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1132637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1133637a0070SStefano Zampini 
1134637a0070SStefano Zampini   PetscFunctionBegin;
113528b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
113628b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11379566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAResetArray(l->A));
1138637a0070SStefano Zampini   PetscFunctionReturn(0);
1139637a0070SStefano Zampini }
1140637a0070SStefano Zampini 
11419371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1142d5ea218eSStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1143d5ea218eSStefano Zampini 
1144d5ea218eSStefano Zampini   PetscFunctionBegin;
114528b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
114628b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11479566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1148d5ea218eSStefano Zampini   PetscFunctionReturn(0);
1149d5ea218eSStefano Zampini }
1150d5ea218eSStefano Zampini 
11519371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1152637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1153637a0070SStefano Zampini 
1154637a0070SStefano Zampini   PetscFunctionBegin;
11559566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1156637a0070SStefano Zampini   PetscFunctionReturn(0);
1157637a0070SStefano Zampini }
1158637a0070SStefano Zampini 
11599371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1160637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1161637a0070SStefano Zampini 
1162637a0070SStefano Zampini   PetscFunctionBegin;
11639566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1164637a0070SStefano Zampini   PetscFunctionReturn(0);
1165637a0070SStefano Zampini }
1166637a0070SStefano Zampini 
11679371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1168637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1169637a0070SStefano Zampini 
1170637a0070SStefano Zampini   PetscFunctionBegin;
11719566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1172637a0070SStefano Zampini   PetscFunctionReturn(0);
1173637a0070SStefano Zampini }
1174637a0070SStefano Zampini 
11759371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1176637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1177637a0070SStefano Zampini 
1178637a0070SStefano Zampini   PetscFunctionBegin;
11799566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1180637a0070SStefano Zampini   PetscFunctionReturn(0);
1181637a0070SStefano Zampini }
1182637a0070SStefano Zampini 
11839371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1184637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1185637a0070SStefano Zampini 
1186637a0070SStefano Zampini   PetscFunctionBegin;
11879566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArray(l->A, a));
1188637a0070SStefano Zampini   PetscFunctionReturn(0);
1189637a0070SStefano Zampini }
1190637a0070SStefano Zampini 
11919371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1192637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1193637a0070SStefano Zampini 
1194637a0070SStefano Zampini   PetscFunctionBegin;
11959566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1196637a0070SStefano Zampini   PetscFunctionReturn(0);
1197637a0070SStefano Zampini }
1198637a0070SStefano Zampini 
11996947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
12006947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
12016947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
12026947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
12036947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
12046947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
12055ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
12066947451fSStefano Zampini 
12079371c9d4SSatish Balay static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1208637a0070SStefano Zampini   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1209637a0070SStefano Zampini 
1210637a0070SStefano Zampini   PetscFunctionBegin;
121128b400f6SJacob Faibussowitsch   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
121228b400f6SJacob Faibussowitsch   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
12131baa6e33SBarry Smith   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1214637a0070SStefano Zampini   mat->boundtocpu = bind;
12156947451fSStefano Zampini   if (!bind) {
12166947451fSStefano Zampini     PetscBool iscuda;
12176947451fSStefano Zampini 
1218*3faff063SStefano Zampini     PetscCall(PetscFree(mat->defaultrandtype));
1219*3faff063SStefano Zampini     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
12209566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
122148a46eb9SPierre Jolivet     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
12229566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
122348a46eb9SPierre Jolivet     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
12249566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
12259566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
12269566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
12279566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
12289566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
12299566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1230126e4e93SJose E. Roman     mat->ops->shift = MatShift_MPIDenseCUDA;
12316947451fSStefano Zampini   } else {
1232*3faff063SStefano Zampini     PetscCall(PetscFree(mat->defaultrandtype));
1233*3faff063SStefano Zampini     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
12349566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
12359566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
12369566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
12379566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
12389566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
12399566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1240126e4e93SJose E. Roman     mat->ops->shift = MatShift_MPIDense;
1241616b8fbbSStefano Zampini   }
12421baa6e33SBarry Smith   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1243637a0070SStefano Zampini   PetscFunctionReturn(0);
1244637a0070SStefano Zampini }
1245637a0070SStefano Zampini 
12469371c9d4SSatish Balay PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1247637a0070SStefano Zampini   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1248637a0070SStefano Zampini   PetscBool     iscuda;
1249637a0070SStefano Zampini 
1250637a0070SStefano Zampini   PetscFunctionBegin;
1251d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
12529566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1253637a0070SStefano Zampini   if (!iscuda) PetscFunctionReturn(0);
12549566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12559566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1256637a0070SStefano Zampini   if (!d->A) {
12579566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
12589566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
12599566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1260637a0070SStefano Zampini   }
12619566063dSJacob Faibussowitsch   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
12629566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1263637a0070SStefano Zampini   A->preallocated = PETSC_TRUE;
12646f08ad05SPierre Jolivet   A->assembled    = PETSC_TRUE;
1265637a0070SStefano Zampini   PetscFunctionReturn(0);
1266637a0070SStefano Zampini }
1267637a0070SStefano Zampini #endif
1268637a0070SStefano Zampini 
12699371c9d4SSatish Balay static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
127073a71a0fSBarry Smith   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
127173a71a0fSBarry Smith 
127273a71a0fSBarry Smith   PetscFunctionBegin;
12739566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(d->A, rctx));
1274*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
1275*3faff063SStefano Zampini   x->offloadmask = d->A->offloadmask;
1276*3faff063SStefano Zampini #endif
127773a71a0fSBarry Smith   PetscFunctionReturn(0);
127873a71a0fSBarry Smith }
127973a71a0fSBarry Smith 
12809371c9d4SSatish Balay static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
12813b49f96aSBarry Smith   PetscFunctionBegin;
12823b49f96aSBarry Smith   *missing = PETSC_FALSE;
12833b49f96aSBarry Smith   PetscFunctionReturn(0);
12843b49f96aSBarry Smith }
12853b49f96aSBarry Smith 
12864222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1287cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12886718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1289a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12906718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
12916718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1292cc48ffa7SToby Isaac 
12938965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
129409dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
129509dc0095SBarry Smith                                        MatGetRow_MPIDense,
129609dc0095SBarry Smith                                        MatRestoreRow_MPIDense,
129709dc0095SBarry Smith                                        MatMult_MPIDense,
129897304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPIDense,
12997c922b88SBarry Smith                                        MatMultTranspose_MPIDense,
13007c922b88SBarry Smith                                        MatMultTransposeAdd_MPIDense,
1301f4259b30SLisandro Dalcin                                        NULL,
1302f4259b30SLisandro Dalcin                                        NULL,
1303f4259b30SLisandro Dalcin                                        NULL,
1304f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1305f4259b30SLisandro Dalcin                                        NULL,
1306f4259b30SLisandro Dalcin                                        NULL,
1307f4259b30SLisandro Dalcin                                        NULL,
130809dc0095SBarry Smith                                        MatTranspose_MPIDense,
130997304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPIDense,
13106e4ee0c6SHong Zhang                                        MatEqual_MPIDense,
131109dc0095SBarry Smith                                        MatGetDiagonal_MPIDense,
13125b2fa520SLois Curfman McInnes                                        MatDiagonalScale_MPIDense,
131309dc0095SBarry Smith                                        MatNorm_MPIDense,
131497304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPIDense,
131509dc0095SBarry Smith                                        MatAssemblyEnd_MPIDense,
131609dc0095SBarry Smith                                        MatSetOption_MPIDense,
131709dc0095SBarry Smith                                        MatZeroEntries_MPIDense,
1318d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_MPIDense,
1319f4259b30SLisandro Dalcin                                        NULL,
1320f4259b30SLisandro Dalcin                                        NULL,
1321f4259b30SLisandro Dalcin                                        NULL,
1322f4259b30SLisandro Dalcin                                        NULL,
13234994cf47SJed Brown                                        /* 29*/ MatSetUp_MPIDense,
1324f4259b30SLisandro Dalcin                                        NULL,
1325f4259b30SLisandro Dalcin                                        NULL,
1326c56a70eeSBarry Smith                                        MatGetDiagonalBlock_MPIDense,
1327f4259b30SLisandro Dalcin                                        NULL,
1328d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPIDense,
1329f4259b30SLisandro Dalcin                                        NULL,
1330f4259b30SLisandro Dalcin                                        NULL,
1331f4259b30SLisandro Dalcin                                        NULL,
1332f4259b30SLisandro Dalcin                                        NULL,
1333d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPIDense,
13347dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIDense,
1335f4259b30SLisandro Dalcin                                        NULL,
133609dc0095SBarry Smith                                        MatGetValues_MPIDense,
1337a76f77c3SStefano Zampini                                        MatCopy_MPIDense,
1338f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
133909dc0095SBarry Smith                                        MatScale_MPIDense,
13402f605a99SJose E. Roman                                        MatShift_MPIDense,
1341f4259b30SLisandro Dalcin                                        NULL,
1342f4259b30SLisandro Dalcin                                        NULL,
134373a71a0fSBarry Smith                                        /* 49*/ MatSetRandom_MPIDense,
1344f4259b30SLisandro Dalcin                                        NULL,
1345f4259b30SLisandro Dalcin                                        NULL,
1346f4259b30SLisandro Dalcin                                        NULL,
1347f4259b30SLisandro Dalcin                                        NULL,
1348f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1349f4259b30SLisandro Dalcin                                        NULL,
1350f4259b30SLisandro Dalcin                                        NULL,
1351f4259b30SLisandro Dalcin                                        NULL,
1352f4259b30SLisandro Dalcin                                        NULL,
13537dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1354b9b97703SBarry Smith                                        MatDestroy_MPIDense,
1355b9b97703SBarry Smith                                        MatView_MPIDense,
1356f4259b30SLisandro Dalcin                                        NULL,
1357f4259b30SLisandro Dalcin                                        NULL,
1358f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1359f4259b30SLisandro Dalcin                                        NULL,
1360f4259b30SLisandro Dalcin                                        NULL,
1361f4259b30SLisandro Dalcin                                        NULL,
1362f4259b30SLisandro Dalcin                                        NULL,
1363f4259b30SLisandro Dalcin                                        /* 69*/ NULL,
1364f4259b30SLisandro Dalcin                                        NULL,
1365f4259b30SLisandro Dalcin                                        NULL,
1366f4259b30SLisandro Dalcin                                        NULL,
1367f4259b30SLisandro Dalcin                                        NULL,
1368f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1369f4259b30SLisandro Dalcin                                        NULL,
1370f4259b30SLisandro Dalcin                                        NULL,
1371f4259b30SLisandro Dalcin                                        NULL,
1372f4259b30SLisandro Dalcin                                        NULL,
1373f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1374f4259b30SLisandro Dalcin                                        NULL,
1375f4259b30SLisandro Dalcin                                        NULL,
1376f4259b30SLisandro Dalcin                                        NULL,
13775bba2384SShri Abhyankar                                        /* 83*/ MatLoad_MPIDense,
1378f4259b30SLisandro Dalcin                                        NULL,
1379f4259b30SLisandro Dalcin                                        NULL,
1380f4259b30SLisandro Dalcin                                        NULL,
1381f4259b30SLisandro Dalcin                                        NULL,
1382f4259b30SLisandro Dalcin                                        NULL,
1383f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1384f4259b30SLisandro Dalcin                                        NULL,
1385f4259b30SLisandro Dalcin                                        NULL,
1386f4259b30SLisandro Dalcin                                        NULL,
1387f4259b30SLisandro Dalcin                                        NULL,
1388f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1389f4259b30SLisandro Dalcin                                        NULL,
13906718818eSStefano Zampini                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1391cc48ffa7SToby Isaac                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1392f4259b30SLisandro Dalcin                                        NULL,
13934222ddf1SHong Zhang                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1394f4259b30SLisandro Dalcin                                        NULL,
1395f4259b30SLisandro Dalcin                                        NULL,
1396ba337c44SJed Brown                                        MatConjugate_MPIDense,
1397f4259b30SLisandro Dalcin                                        NULL,
1398f4259b30SLisandro Dalcin                                        /*104*/ NULL,
1399ba337c44SJed Brown                                        MatRealPart_MPIDense,
1400ba337c44SJed Brown                                        MatImaginaryPart_MPIDense,
1401f4259b30SLisandro Dalcin                                        NULL,
1402f4259b30SLisandro Dalcin                                        NULL,
1403f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1404f4259b30SLisandro Dalcin                                        NULL,
1405f4259b30SLisandro Dalcin                                        NULL,
140649a6ff4bSBarry Smith                                        MatGetColumnVector_MPIDense,
14073b49f96aSBarry Smith                                        MatMissingDiagonal_MPIDense,
1408f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1409f4259b30SLisandro Dalcin                                        NULL,
1410f4259b30SLisandro Dalcin                                        NULL,
1411f4259b30SLisandro Dalcin                                        NULL,
1412f4259b30SLisandro Dalcin                                        NULL,
1413f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1414f4259b30SLisandro Dalcin                                        NULL,
1415f4259b30SLisandro Dalcin                                        NULL,
1416f4259b30SLisandro Dalcin                                        NULL,
1417f4259b30SLisandro Dalcin                                        NULL,
1418f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1419a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIDense,
1420f4259b30SLisandro Dalcin                                        NULL,
1421f4259b30SLisandro Dalcin                                        NULL,
1422f4259b30SLisandro Dalcin                                        NULL,
1423f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1424f4259b30SLisandro Dalcin                                        NULL,
14256718818eSStefano Zampini                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1426cb20be35SHong Zhang                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1427f4259b30SLisandro Dalcin                                        NULL,
1428f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1429f4259b30SLisandro Dalcin                                        NULL,
1430f4259b30SLisandro Dalcin                                        NULL,
1431f4259b30SLisandro Dalcin                                        NULL,
1432f4259b30SLisandro Dalcin                                        NULL,
1433f4259b30SLisandro Dalcin                                        /*139*/ NULL,
1434f4259b30SLisandro Dalcin                                        NULL,
1435f4259b30SLisandro Dalcin                                        NULL,
1436f4259b30SLisandro Dalcin                                        NULL,
1437f4259b30SLisandro Dalcin                                        NULL,
14384222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1439f4259b30SLisandro Dalcin                                        /*145*/ NULL,
1440f4259b30SLisandro Dalcin                                        NULL,
144199a7f59eSMark Adams                                        NULL,
144299a7f59eSMark Adams                                        NULL,
14437fb60732SBarry Smith                                        NULL,
14449371c9d4SSatish Balay                                        /*150*/ NULL};
14458965ea79SLois Curfman McInnes 
14469371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1447637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1448637a0070SStefano Zampini   PetscBool     iscuda;
1449a23d5eceSKris Buschelman 
1450a23d5eceSKris Buschelman   PetscFunctionBegin;
145128b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
14529566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
14539566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
1454637a0070SStefano Zampini   if (!a->A) {
14559566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
14569566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
14579566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1458637a0070SStefano Zampini   }
14599566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
14609566063dSJacob Faibussowitsch   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
14619566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1462*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
1463*3faff063SStefano Zampini   mat->offloadmask = a->A->offloadmask;
1464*3faff063SStefano Zampini #endif
1465637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
14666f08ad05SPierre Jolivet   mat->assembled    = PETSC_TRUE;
1467a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1468a23d5eceSKris Buschelman }
1469a23d5eceSKris Buschelman 
14709371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
14714e29119aSPierre Jolivet   Mat B, C;
14724e29119aSPierre Jolivet 
14734e29119aSPierre Jolivet   PetscFunctionBegin;
14749566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
14759566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
14769566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&C));
14774e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14784e29119aSPierre Jolivet     C = *newmat;
14794e29119aSPierre Jolivet   } else C = NULL;
14809566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14819566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14824e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14839566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14844e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14854e29119aSPierre Jolivet   PetscFunctionReturn(0);
14864e29119aSPierre Jolivet }
14874e29119aSPierre Jolivet 
14889371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
14894e29119aSPierre Jolivet   Mat B, C;
14904e29119aSPierre Jolivet 
14914e29119aSPierre Jolivet   PetscFunctionBegin;
14929566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLocalMatrix(A, &C));
14939566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
14944e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14954e29119aSPierre Jolivet     C = *newmat;
14964e29119aSPierre Jolivet   } else C = NULL;
14979566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14989566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14994e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
15009566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
15014e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
15024e29119aSPierre Jolivet   PetscFunctionReturn(0);
15034e29119aSPierre Jolivet }
15044e29119aSPierre Jolivet 
150565b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
15069371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
15078ea901baSHong Zhang   Mat          mat_elemental;
150832d7a744SHong Zhang   PetscScalar *v;
150932d7a744SHong Zhang   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
15108ea901baSHong Zhang 
15118baccfbdSHong Zhang   PetscFunctionBegin;
1512378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1513378336b6SHong Zhang     mat_elemental = *newmat;
15149566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(*newmat));
1515378336b6SHong Zhang   } else {
15169566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
15179566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
15189566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
15199566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_elemental));
15209566063dSJacob Faibussowitsch     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1521378336b6SHong Zhang   }
1522378336b6SHong Zhang 
15239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &rows, N, &cols));
152432d7a744SHong Zhang   for (i = 0; i < N; i++) cols[i] = i;
152532d7a744SHong Zhang   for (i = 0; i < m; i++) rows[i] = rstart + i;
15268ea901baSHong Zhang 
1527637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
15289566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(A, &v));
15299566063dSJacob Faibussowitsch   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
15309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
15319566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
15329566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &v));
15339566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, cols));
15348ea901baSHong Zhang 
1535511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
15369566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &mat_elemental));
15378ea901baSHong Zhang   } else {
15388ea901baSHong Zhang     *newmat = mat_elemental;
15398ea901baSHong Zhang   }
15408baccfbdSHong Zhang   PetscFunctionReturn(0);
15418baccfbdSHong Zhang }
154265b80a83SHong Zhang #endif
15438baccfbdSHong Zhang 
15449371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
154586aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
154686aefd0dSHong Zhang 
154786aefd0dSHong Zhang   PetscFunctionBegin;
15489566063dSJacob Faibussowitsch   PetscCall(MatDenseGetColumn(mat->A, col, vals));
154986aefd0dSHong Zhang   PetscFunctionReturn(0);
155086aefd0dSHong Zhang }
155186aefd0dSHong Zhang 
15529371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
155386aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
155486aefd0dSHong Zhang 
155586aefd0dSHong Zhang   PetscFunctionBegin;
15569566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreColumn(mat->A, vals));
155786aefd0dSHong Zhang   PetscFunctionReturn(0);
155886aefd0dSHong Zhang }
155986aefd0dSHong Zhang 
15609371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
156194e2cb23SJakub Kruzik   Mat_MPIDense *mat;
156294e2cb23SJakub Kruzik   PetscInt      m, nloc, N;
156394e2cb23SJakub Kruzik 
156494e2cb23SJakub Kruzik   PetscFunctionBegin;
15659566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
15669566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
156794e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
156894e2cb23SJakub Kruzik     PetscInt sum;
156994e2cb23SJakub Kruzik 
157048a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
157194e2cb23SJakub Kruzik     /* Check sum(n) = N */
15721c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
157308401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
157494e2cb23SJakub Kruzik 
15759566063dSJacob Faibussowitsch     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
15769566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
157794e2cb23SJakub Kruzik   }
157894e2cb23SJakub Kruzik 
157994e2cb23SJakub Kruzik   /* numeric phase */
158094e2cb23SJakub Kruzik   mat = (Mat_MPIDense *)(*outmat)->data;
15819566063dSJacob Faibussowitsch   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
158294e2cb23SJakub Kruzik   PetscFunctionReturn(0);
158394e2cb23SJakub Kruzik }
158494e2cb23SJakub Kruzik 
1585637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
15869371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1587637a0070SStefano Zampini   Mat           B;
1588637a0070SStefano Zampini   Mat_MPIDense *m;
1589637a0070SStefano Zampini 
1590637a0070SStefano Zampini   PetscFunctionBegin;
1591637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
15929566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1593637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
15949566063dSJacob Faibussowitsch     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1595637a0070SStefano Zampini   }
1596637a0070SStefano Zampini 
1597637a0070SStefano Zampini   B = *newmat;
15989566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
15999566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->defaultvectype));
16009566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
16019566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
16029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
16039566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
16049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
16059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
16069566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
16079566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
16089566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
16099566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
16109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
16119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
16129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
16139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
16149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
16159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1616637a0070SStefano Zampini   m = (Mat_MPIDense *)(B)->data;
161748a46eb9SPierre Jolivet   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1618637a0070SStefano Zampini   B->ops->bindtocpu = NULL;
1619637a0070SStefano Zampini   B->offloadmask    = PETSC_OFFLOAD_CPU;
1620637a0070SStefano Zampini   PetscFunctionReturn(0);
1621637a0070SStefano Zampini }
1622637a0070SStefano Zampini 
16239371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1624637a0070SStefano Zampini   Mat           B;
1625637a0070SStefano Zampini   Mat_MPIDense *m;
1626637a0070SStefano Zampini 
1627637a0070SStefano Zampini   PetscFunctionBegin;
1628637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
16299566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1630637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
16319566063dSJacob Faibussowitsch     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1632637a0070SStefano Zampini   }
1633637a0070SStefano Zampini 
1634637a0070SStefano Zampini   B = *newmat;
16359566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->defaultvectype));
16369566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
16379566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
16389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
16399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
16409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
16419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
16429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
16439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
16449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
16459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
16469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
16479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
16489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
16499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
16509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
16519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1652f5ff9c66SStefano Zampini   m = (Mat_MPIDense *)(B->data);
1653637a0070SStefano Zampini   if (m->A) {
16549566063dSJacob Faibussowitsch     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1655637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_BOTH;
1656637a0070SStefano Zampini   } else {
1657637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1658637a0070SStefano Zampini   }
16599566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1660637a0070SStefano Zampini 
1661637a0070SStefano Zampini   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1662637a0070SStefano Zampini   PetscFunctionReturn(0);
1663637a0070SStefano Zampini }
1664637a0070SStefano Zampini #endif
1665637a0070SStefano Zampini 
16669371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
16676947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16686947451fSStefano Zampini   PetscInt      lda;
16696947451fSStefano Zampini 
16706947451fSStefano Zampini   PetscFunctionBegin;
167128b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
167228b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
167348a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
16746947451fSStefano Zampini   a->vecinuse = col + 1;
16759566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
16769566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
16779566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
16786947451fSStefano Zampini   *v = a->cvec;
16796947451fSStefano Zampini   PetscFunctionReturn(0);
16806947451fSStefano Zampini }
16816947451fSStefano Zampini 
16829371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
16836947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16846947451fSStefano Zampini 
16856947451fSStefano Zampini   PetscFunctionBegin;
168628b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
168728b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
16886947451fSStefano Zampini   a->vecinuse = 0;
16899566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
16909566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1691742765d3SMatthew Knepley   if (v) *v = NULL;
16926947451fSStefano Zampini   PetscFunctionReturn(0);
16936947451fSStefano Zampini }
16946947451fSStefano Zampini 
16959371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
16966947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16976947451fSStefano Zampini   PetscInt      lda;
16986947451fSStefano Zampini 
16996947451fSStefano Zampini   PetscFunctionBegin;
170028b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
170128b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
170248a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
17036947451fSStefano Zampini   a->vecinuse = col + 1;
17049566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
17059566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
17069566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
17079566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
17086947451fSStefano Zampini   *v = a->cvec;
17096947451fSStefano Zampini   PetscFunctionReturn(0);
17106947451fSStefano Zampini }
17116947451fSStefano Zampini 
17129371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
17136947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17146947451fSStefano Zampini 
17156947451fSStefano Zampini   PetscFunctionBegin;
17165f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
17175f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
17186947451fSStefano Zampini   a->vecinuse = 0;
17199566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
17209566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
17219566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1722742765d3SMatthew Knepley   if (v) *v = NULL;
17236947451fSStefano Zampini   PetscFunctionReturn(0);
17246947451fSStefano Zampini }
17256947451fSStefano Zampini 
17269371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
17276947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17286947451fSStefano Zampini   PetscInt      lda;
17296947451fSStefano Zampini 
17306947451fSStefano Zampini   PetscFunctionBegin;
17315f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
17325f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
173348a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
17346947451fSStefano Zampini   a->vecinuse = col + 1;
17359566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
17369566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
17379566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
17386947451fSStefano Zampini   *v = a->cvec;
17396947451fSStefano Zampini   PetscFunctionReturn(0);
17406947451fSStefano Zampini }
17416947451fSStefano Zampini 
17429371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
17436947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17446947451fSStefano Zampini 
17456947451fSStefano Zampini   PetscFunctionBegin;
17465f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
17475f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
17486947451fSStefano Zampini   a->vecinuse = 0;
17499566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
17509566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1751742765d3SMatthew Knepley   if (v) *v = NULL;
17526947451fSStefano Zampini   PetscFunctionReturn(0);
17536947451fSStefano Zampini }
17546947451fSStefano Zampini 
17559371c9d4SSatish Balay PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
17565ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1757616b8fbbSStefano Zampini   Mat_MPIDense *c;
1758616b8fbbSStefano Zampini   MPI_Comm      comm;
1759a2748737SPierre Jolivet   PetscInt      pbegin, pend;
17605ea7661aSPierre Jolivet 
17615ea7661aSPierre Jolivet   PetscFunctionBegin;
17629566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
17635f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
17645f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1765a2748737SPierre Jolivet   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1766a2748737SPierre Jolivet   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
17675ea7661aSPierre Jolivet   if (!a->cmat) {
17689566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &a->cmat));
17699566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat));
17709566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1771a2748737SPierre Jolivet     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1772a2748737SPierre Jolivet     else {
1773a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1774a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1775a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1776a2748737SPierre Jolivet     }
17779566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
17789566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1779a2748737SPierre Jolivet   } else {
1780a2748737SPierre Jolivet     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1781a2748737SPierre Jolivet     if (same && a->cmat->rmap->N != A->rmap->N) {
1782a2748737SPierre Jolivet       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1783a2748737SPierre Jolivet       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1784a2748737SPierre Jolivet     }
1785a2748737SPierre Jolivet     if (!same) {
1786a2748737SPierre Jolivet       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1787a2748737SPierre Jolivet       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1788a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1789a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1790a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1791a2748737SPierre Jolivet     }
1792a2748737SPierre Jolivet     if (cend - cbegin != a->cmat->cmap->N) {
17939566063dSJacob Faibussowitsch       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
17949566063dSJacob Faibussowitsch       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
17959566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
17969566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
17975ea7661aSPierre Jolivet     }
1798a2748737SPierre Jolivet   }
1799616b8fbbSStefano Zampini   c = (Mat_MPIDense *)a->cmat->data;
18005f80ce2aSJacob Faibussowitsch   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1801a2748737SPierre Jolivet   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1802*3faff063SStefano Zampini 
1803616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1804616b8fbbSStefano Zampini   a->cmat->assembled    = PETSC_TRUE;
1805*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
1806*3faff063SStefano Zampini   a->cmat->offloadmask = c->A->offloadmask;
1807*3faff063SStefano Zampini #endif
18085ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
18095ea7661aSPierre Jolivet   *v          = a->cmat;
18105ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18115ea7661aSPierre Jolivet }
18125ea7661aSPierre Jolivet 
18139371c9d4SSatish Balay PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
18145ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1815616b8fbbSStefano Zampini   Mat_MPIDense *c;
18165ea7661aSPierre Jolivet 
18175ea7661aSPierre Jolivet   PetscFunctionBegin;
18185f80ce2aSJacob Faibussowitsch   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
18195f80ce2aSJacob Faibussowitsch   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
18205f80ce2aSJacob Faibussowitsch   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
18215ea7661aSPierre Jolivet   a->matinuse = 0;
1822616b8fbbSStefano Zampini   c           = (Mat_MPIDense *)a->cmat->data;
18239566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1824742765d3SMatthew Knepley   if (v) *v = NULL;
1825*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
1826*3faff063SStefano Zampini   A->offloadmask = a->A->offloadmask;
1827*3faff063SStefano Zampini #endif
18285ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18295ea7661aSPierre Jolivet }
18305ea7661aSPierre Jolivet 
1831002bc6daSRichard Tran Mills /*MC
1832002bc6daSRichard Tran Mills    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1833002bc6daSRichard Tran Mills 
1834002bc6daSRichard Tran Mills    Options Database Keys:
183511a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1836002bc6daSRichard Tran Mills 
1837002bc6daSRichard Tran Mills   Level: beginner
1838002bc6daSRichard Tran Mills 
183911a5261eSBarry Smith .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1840002bc6daSRichard Tran Mills M*/
18419371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1842273d9f13SBarry Smith   Mat_MPIDense *a;
1843273d9f13SBarry Smith 
1844273d9f13SBarry Smith   PetscFunctionBegin;
18459566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(mat, &a));
1846b0a32e0cSBarry Smith   mat->data = (void *)a;
18479566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1848273d9f13SBarry Smith 
1849273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1850273d9f13SBarry Smith 
1851273d9f13SBarry Smith   /* build cache for off array entries formed */
1852273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
18532205254eSKarl Rupp 
18549566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1855273d9f13SBarry Smith 
1856273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1857f4259b30SLisandro Dalcin   a->lvec        = NULL;
1858f4259b30SLisandro Dalcin   a->Mvctx       = NULL;
1859273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1860273d9f13SBarry Smith 
18619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
18629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
18639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
18649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
18659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
18669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
18679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
18689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
18699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
18709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
18719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
18729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
18739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
18749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
18759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
18769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
18779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
18789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
18799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
18809566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
18819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
18828baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
18839566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
18848baccfbdSHong Zhang #endif
1885d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
18869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1887d24d4204SJose E. Roman #endif
1888637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
18899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1890637a0070SStefano Zampini #endif
18919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
18929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
18939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
18946718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
18959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
18969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
18976718818eSStefano Zampini #endif
18988949adfdSHong Zhang 
18999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
19009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
19019566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1902273d9f13SBarry Smith   PetscFunctionReturn(0);
1903273d9f13SBarry Smith }
1904273d9f13SBarry Smith 
1905209238afSKris Buschelman /*MC
1906637a0070SStefano Zampini    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1907637a0070SStefano Zampini 
1908637a0070SStefano Zampini    Options Database Keys:
190911a5261eSBarry Smith . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1910637a0070SStefano Zampini 
1911637a0070SStefano Zampini   Level: beginner
1912637a0070SStefano Zampini 
191311a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1914637a0070SStefano Zampini M*/
1915637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1916a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h>
19179371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1918637a0070SStefano Zampini   PetscFunctionBegin;
19199566063dSJacob Faibussowitsch   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
19209566063dSJacob Faibussowitsch   PetscCall(MatCreate_MPIDense(B));
19219566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1922637a0070SStefano Zampini   PetscFunctionReturn(0);
1923637a0070SStefano Zampini }
1924637a0070SStefano Zampini #endif
1925637a0070SStefano Zampini 
1926637a0070SStefano Zampini /*MC
1927002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1928209238afSKris Buschelman 
192911a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
193011a5261eSBarry Smith    and `MATMPIDENSE` otherwise.
1931209238afSKris Buschelman 
1932209238afSKris Buschelman    Options Database Keys:
193311a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1934209238afSKris Buschelman 
1935209238afSKris Buschelman   Level: beginner
1936209238afSKris Buschelman 
1937c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
19386947451fSStefano Zampini M*/
19396947451fSStefano Zampini 
19406947451fSStefano Zampini /*MC
19416947451fSStefano Zampini    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
19426947451fSStefano Zampini 
194311a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
194411a5261eSBarry Smith    and `MATMPIDENSECUDA` otherwise.
19456947451fSStefano Zampini 
19466947451fSStefano Zampini    Options Database Keys:
194711a5261eSBarry Smith . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
19486947451fSStefano Zampini 
19496947451fSStefano Zampini   Level: beginner
19506947451fSStefano Zampini 
1951c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1952209238afSKris Buschelman M*/
1953209238afSKris Buschelman 
1954273d9f13SBarry Smith /*@C
1955273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1956273d9f13SBarry Smith 
19578f490ea3SStefano Zampini    Collective
1958273d9f13SBarry Smith 
1959273d9f13SBarry Smith    Input Parameters:
19601c4f3114SJed Brown .  B - the matrix
19610298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1962273d9f13SBarry Smith    to control all matrix memory allocation.
1963273d9f13SBarry Smith 
1964273d9f13SBarry Smith    Notes:
1965273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1966273d9f13SBarry Smith    storage by columns.
1967273d9f13SBarry Smith 
1968273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1969273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
19700298fd71SBarry Smith    set data=NULL.
1971273d9f13SBarry Smith 
1972273d9f13SBarry Smith    Level: intermediate
1973273d9f13SBarry Smith 
197411a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1975273d9f13SBarry Smith @*/
19769371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1977273d9f13SBarry Smith   PetscFunctionBegin;
1978d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1979cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1980273d9f13SBarry Smith   PetscFunctionReturn(0);
1981273d9f13SBarry Smith }
1982273d9f13SBarry Smith 
1983d3042a70SBarry Smith /*@
198411a5261eSBarry Smith    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1985d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1986d3042a70SBarry Smith    into a matrix
1987d3042a70SBarry Smith 
1988d3042a70SBarry Smith    Not Collective
1989d3042a70SBarry Smith 
1990d3042a70SBarry Smith    Input Parameters:
1991d3042a70SBarry Smith +  mat - the matrix
1992d3042a70SBarry Smith -  array - the array in column major order
1993d3042a70SBarry Smith 
199411a5261eSBarry Smith    Note:
199511a5261eSBarry Smith    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1996d3042a70SBarry Smith    freed when the matrix is destroyed.
1997d3042a70SBarry Smith 
1998d3042a70SBarry Smith    Level: developer
1999d3042a70SBarry Smith 
200011a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
200111a5261eSBarry Smith           `MatDenseReplaceArray()`
2002d3042a70SBarry Smith @*/
20039371c9d4SSatish Balay PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
2004d3042a70SBarry Smith   PetscFunctionBegin;
2005d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2006cac4c232SBarry Smith   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
20079566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2008*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
2009637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2010637a0070SStefano Zampini #endif
2011d3042a70SBarry Smith   PetscFunctionReturn(0);
2012d3042a70SBarry Smith }
2013d3042a70SBarry Smith 
2014d3042a70SBarry Smith /*@
201511a5261eSBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2016d3042a70SBarry Smith 
2017d3042a70SBarry Smith    Not Collective
2018d3042a70SBarry Smith 
2019d3042a70SBarry Smith    Input Parameters:
2020d3042a70SBarry Smith .  mat - the matrix
2021d3042a70SBarry Smith 
202211a5261eSBarry Smith    Note:
202311a5261eSBarry Smith    You can only call this after a call to `MatDensePlaceArray()`
2024d3042a70SBarry Smith 
2025d3042a70SBarry Smith    Level: developer
2026d3042a70SBarry Smith 
202711a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2028d3042a70SBarry Smith @*/
20299371c9d4SSatish Balay PetscErrorCode MatDenseResetArray(Mat mat) {
2030d3042a70SBarry Smith   PetscFunctionBegin;
2031d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2032cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
20339566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2034d3042a70SBarry Smith   PetscFunctionReturn(0);
2035d3042a70SBarry Smith }
2036d3042a70SBarry Smith 
2037d5ea218eSStefano Zampini /*@
2038d5ea218eSStefano Zampini    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2039d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2040d5ea218eSStefano Zampini    into a matrix
2041d5ea218eSStefano Zampini 
2042d5ea218eSStefano Zampini    Not Collective
2043d5ea218eSStefano Zampini 
2044d5ea218eSStefano Zampini    Input Parameters:
2045d5ea218eSStefano Zampini +  mat - the matrix
2046d5ea218eSStefano Zampini -  array - the array in column major order
2047d5ea218eSStefano Zampini 
204811a5261eSBarry Smith    Note:
204911a5261eSBarry Smith    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2050d5ea218eSStefano Zampini    freed by the user. It will be freed when the matrix is destroyed.
2051d5ea218eSStefano Zampini 
2052d5ea218eSStefano Zampini    Level: developer
2053d5ea218eSStefano Zampini 
205411a5261eSBarry Smith .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2055d5ea218eSStefano Zampini @*/
20569371c9d4SSatish Balay PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2057d5ea218eSStefano Zampini   PetscFunctionBegin;
2058d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2059cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
20609566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2061*3faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
2062d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2063d5ea218eSStefano Zampini #endif
2064d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2065d5ea218eSStefano Zampini }
2066d5ea218eSStefano Zampini 
2067637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
20688965ea79SLois Curfman McInnes /*@C
206911a5261eSBarry Smith    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2070637a0070SStefano Zampini    array provided by the user. This is useful to avoid copying an array
2071637a0070SStefano Zampini    into a matrix
2072637a0070SStefano Zampini 
2073637a0070SStefano Zampini    Not Collective
2074637a0070SStefano Zampini 
2075637a0070SStefano Zampini    Input Parameters:
2076637a0070SStefano Zampini +  mat - the matrix
2077637a0070SStefano Zampini -  array - the array in column major order
2078637a0070SStefano Zampini 
207911a5261eSBarry Smith    Note:
208011a5261eSBarry Smith    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2081637a0070SStefano Zampini    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2082637a0070SStefano Zampini 
2083637a0070SStefano Zampini    Level: developer
2084637a0070SStefano Zampini 
208511a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2086637a0070SStefano Zampini @*/
20879371c9d4SSatish Balay PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2088637a0070SStefano Zampini   PetscFunctionBegin;
2089d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2090cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
20919566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2092637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2093637a0070SStefano Zampini   PetscFunctionReturn(0);
2094637a0070SStefano Zampini }
2095637a0070SStefano Zampini 
2096637a0070SStefano Zampini /*@C
209711a5261eSBarry Smith    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2098637a0070SStefano Zampini 
2099637a0070SStefano Zampini    Not Collective
2100637a0070SStefano Zampini 
2101637a0070SStefano Zampini    Input Parameters:
2102637a0070SStefano Zampini .  mat - the matrix
2103637a0070SStefano Zampini 
210411a5261eSBarry Smith    Note:
210511a5261eSBarry Smith    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2106637a0070SStefano Zampini 
2107637a0070SStefano Zampini    Level: developer
2108637a0070SStefano Zampini 
210911a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2110637a0070SStefano Zampini @*/
21119371c9d4SSatish Balay PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2112637a0070SStefano Zampini   PetscFunctionBegin;
2113d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2114cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
21159566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2116637a0070SStefano Zampini   PetscFunctionReturn(0);
2117637a0070SStefano Zampini }
2118637a0070SStefano Zampini 
2119637a0070SStefano Zampini /*@C
212011a5261eSBarry Smith    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2121d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2122d5ea218eSStefano Zampini    into a matrix
2123d5ea218eSStefano Zampini 
2124d5ea218eSStefano Zampini    Not Collective
2125d5ea218eSStefano Zampini 
2126d5ea218eSStefano Zampini    Input Parameters:
2127d5ea218eSStefano Zampini +  mat - the matrix
2128d5ea218eSStefano Zampini -  array - the array in column major order
2129d5ea218eSStefano Zampini 
213011a5261eSBarry Smith    Note:
2131d5ea218eSStefano Zampini    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2132d5ea218eSStefano Zampini    The memory passed in CANNOT be freed by the user. It will be freed
2133d5ea218eSStefano Zampini    when the matrix is destroyed. The array should respect the matrix leading dimension.
2134d5ea218eSStefano Zampini 
2135d5ea218eSStefano Zampini    Level: developer
2136d5ea218eSStefano Zampini 
2137db781477SPatrick Sanan .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2138d5ea218eSStefano Zampini @*/
21399371c9d4SSatish Balay PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2140d5ea218eSStefano Zampini   PetscFunctionBegin;
2141d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2142cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
21439566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2144d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2145d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2146d5ea218eSStefano Zampini }
2147d5ea218eSStefano Zampini 
2148d5ea218eSStefano Zampini /*@C
214911a5261eSBarry Smith    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2150637a0070SStefano Zampini 
2151637a0070SStefano Zampini    Not Collective
2152637a0070SStefano Zampini 
2153637a0070SStefano Zampini    Input Parameters:
2154637a0070SStefano Zampini .  A - the matrix
2155637a0070SStefano Zampini 
2156637a0070SStefano Zampini    Output Parameters
2157637a0070SStefano Zampini .  array - the GPU array in column major order
2158637a0070SStefano Zampini 
2159637a0070SStefano Zampini    Notes:
216011a5261eSBarry Smith    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
216111a5261eSBarry Smith 
216211a5261eSBarry Smith    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2163637a0070SStefano Zampini 
2164637a0070SStefano Zampini    Level: developer
2165637a0070SStefano Zampini 
216611a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2167637a0070SStefano Zampini @*/
21689371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2169637a0070SStefano Zampini   PetscFunctionBegin;
2170d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2171cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
21729566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2173637a0070SStefano Zampini   PetscFunctionReturn(0);
2174637a0070SStefano Zampini }
2175637a0070SStefano Zampini 
2176637a0070SStefano Zampini /*@C
217711a5261eSBarry Smith    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2178637a0070SStefano Zampini 
2179637a0070SStefano Zampini    Not Collective
2180637a0070SStefano Zampini 
2181637a0070SStefano Zampini    Input Parameters:
2182637a0070SStefano Zampini +  A - the matrix
2183637a0070SStefano Zampini -  array - the GPU array in column major order
2184637a0070SStefano Zampini 
2185637a0070SStefano Zampini    Level: developer
2186637a0070SStefano Zampini 
218711a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2188637a0070SStefano Zampini @*/
21899371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2190637a0070SStefano Zampini   PetscFunctionBegin;
2191d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2192cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
21939566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2194637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2195637a0070SStefano Zampini   PetscFunctionReturn(0);
2196637a0070SStefano Zampini }
2197637a0070SStefano Zampini 
2198637a0070SStefano Zampini /*@C
219911a5261eSBarry Smith    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2200637a0070SStefano Zampini 
2201637a0070SStefano Zampini    Not Collective
2202637a0070SStefano Zampini 
2203637a0070SStefano Zampini    Input Parameters:
2204637a0070SStefano Zampini .  A - the matrix
2205637a0070SStefano Zampini 
2206637a0070SStefano Zampini    Output Parameters
2207637a0070SStefano Zampini .  array - the GPU array in column major order
2208637a0070SStefano Zampini 
220911a5261eSBarry Smith    Note:
221011a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2211637a0070SStefano Zampini 
2212637a0070SStefano Zampini    Level: developer
2213637a0070SStefano Zampini 
221411a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2215637a0070SStefano Zampini @*/
22169371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2217637a0070SStefano Zampini   PetscFunctionBegin;
2218d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2219cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2220637a0070SStefano Zampini   PetscFunctionReturn(0);
2221637a0070SStefano Zampini }
2222637a0070SStefano Zampini 
2223637a0070SStefano Zampini /*@C
222411a5261eSBarry Smith    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2225637a0070SStefano Zampini 
2226637a0070SStefano Zampini    Not Collective
2227637a0070SStefano Zampini 
2228637a0070SStefano Zampini    Input Parameters:
2229637a0070SStefano Zampini +  A - the matrix
2230637a0070SStefano Zampini -  array - the GPU array in column major order
2231637a0070SStefano Zampini 
223211a5261eSBarry Smith    Note:
223311a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2234637a0070SStefano Zampini 
2235637a0070SStefano Zampini    Level: developer
2236637a0070SStefano Zampini 
223711a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2238637a0070SStefano Zampini @*/
22399371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2240637a0070SStefano Zampini   PetscFunctionBegin;
2241cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2242637a0070SStefano Zampini   PetscFunctionReturn(0);
2243637a0070SStefano Zampini }
2244637a0070SStefano Zampini 
2245637a0070SStefano Zampini /*@C
224611a5261eSBarry Smith    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2247637a0070SStefano Zampini 
2248637a0070SStefano Zampini    Not Collective
2249637a0070SStefano Zampini 
2250637a0070SStefano Zampini    Input Parameters:
2251637a0070SStefano Zampini .  A - the matrix
2252637a0070SStefano Zampini 
2253637a0070SStefano Zampini    Output Parameters
2254637a0070SStefano Zampini .  array - the GPU array in column major order
2255637a0070SStefano Zampini 
225611a5261eSBarry Smith    Note:
225711a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2258637a0070SStefano Zampini 
2259637a0070SStefano Zampini    Level: developer
2260637a0070SStefano Zampini 
226111a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2262637a0070SStefano Zampini @*/
22639371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2264637a0070SStefano Zampini   PetscFunctionBegin;
2265d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2266cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
22679566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2268637a0070SStefano Zampini   PetscFunctionReturn(0);
2269637a0070SStefano Zampini }
2270637a0070SStefano Zampini 
2271637a0070SStefano Zampini /*@C
227211a5261eSBarry Smith    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2273637a0070SStefano Zampini 
2274637a0070SStefano Zampini    Not Collective
2275637a0070SStefano Zampini 
2276637a0070SStefano Zampini    Input Parameters:
2277637a0070SStefano Zampini +  A - the matrix
2278637a0070SStefano Zampini -  array - the GPU array in column major order
2279637a0070SStefano Zampini 
2280637a0070SStefano Zampini    Level: developer
2281637a0070SStefano Zampini 
228211a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2283637a0070SStefano Zampini @*/
22849371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2285637a0070SStefano Zampini   PetscFunctionBegin;
2286d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2287cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
22889566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2289637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2290637a0070SStefano Zampini   PetscFunctionReturn(0);
2291637a0070SStefano Zampini }
2292637a0070SStefano Zampini #endif
2293637a0070SStefano Zampini 
2294637a0070SStefano Zampini /*@C
229511a5261eSBarry Smith    MatCreateDense - Creates a matrix in `MATDENSE` format.
22968965ea79SLois Curfman McInnes 
2297d083f849SBarry Smith    Collective
2298db81eaa0SLois Curfman McInnes 
22998965ea79SLois Curfman McInnes    Input Parameters:
2300db81eaa0SLois Curfman McInnes +  comm - MPI communicator
230111a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
230211a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
230311a5261eSBarry Smith .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
230411a5261eSBarry Smith .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
230511a5261eSBarry Smith -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2306dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
23078965ea79SLois Curfman McInnes 
23088965ea79SLois Curfman McInnes    Output Parameter:
2309477f1c0bSLois Curfman McInnes .  A - the matrix
23108965ea79SLois Curfman McInnes 
2311b259b22eSLois Curfman McInnes    Notes:
231239ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
231339ddd567SLois Curfman McInnes    storage by columns.
231411a5261eSBarry Smith 
231511a5261eSBarry Smith    Although local portions of the matrix are stored in column-major
2316002bc6daSRichard Tran Mills    order, the matrix is partitioned across MPI ranks by row.
23178965ea79SLois Curfman McInnes 
231818f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
231918f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
232011a5261eSBarry Smith    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
232118f449edSLois Curfman McInnes 
23228965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
23238965ea79SLois Curfman McInnes    (possibly both).
23248965ea79SLois Curfman McInnes 
2325027ccd11SLois Curfman McInnes    Level: intermediate
2326027ccd11SLois Curfman McInnes 
232711a5261eSBarry Smith .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
23288965ea79SLois Curfman McInnes @*/
23299371c9d4SSatish Balay PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
23303a40ed3dSBarry Smith   PetscFunctionBegin;
23319566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23329566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23336f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSE));
23349566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(*A, data));
23356f08ad05SPierre Jolivet   PetscCall(MatMPIDenseSetPreallocation(*A, data));
23363a40ed3dSBarry Smith   PetscFunctionReturn(0);
23378965ea79SLois Curfman McInnes }
23388965ea79SLois Curfman McInnes 
2339637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2340637a0070SStefano Zampini /*@C
234111a5261eSBarry Smith    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2342637a0070SStefano Zampini 
2343637a0070SStefano Zampini    Collective
2344637a0070SStefano Zampini 
2345637a0070SStefano Zampini    Input Parameters:
2346637a0070SStefano Zampini +  comm - MPI communicator
234711a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
234811a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
234911a5261eSBarry Smith .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
235011a5261eSBarry Smith .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2351637a0070SStefano Zampini -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2352637a0070SStefano Zampini    to control matrix memory allocation.
2353637a0070SStefano Zampini 
2354637a0070SStefano Zampini    Output Parameter:
2355637a0070SStefano Zampini .  A - the matrix
2356637a0070SStefano Zampini 
2357637a0070SStefano Zampini    Level: intermediate
2358637a0070SStefano Zampini 
235911a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2360637a0070SStefano Zampini @*/
23619371c9d4SSatish Balay PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2362637a0070SStefano Zampini   PetscFunctionBegin;
23639566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
2364637a0070SStefano Zampini   PetscValidLogicalCollectiveBool(*A, !!data, 6);
23659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23666f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSECUDA));
23679566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
23686f08ad05SPierre Jolivet   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2369637a0070SStefano Zampini   PetscFunctionReturn(0);
2370637a0070SStefano Zampini }
2371637a0070SStefano Zampini #endif
2372637a0070SStefano Zampini 
23739371c9d4SSatish Balay static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
23748965ea79SLois Curfman McInnes   Mat           mat;
23753501a2bdSLois Curfman McInnes   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
23768965ea79SLois Curfman McInnes 
23773a40ed3dSBarry Smith   PetscFunctionBegin;
2378f4259b30SLisandro Dalcin   *newmat = NULL;
23799566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
23809566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
23819566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2382834f8fabSBarry Smith   a = (Mat_MPIDense *)mat->data;
23835aa7edbeSHong Zhang 
2384d5f3da31SBarry Smith   mat->factortype   = A->factortype;
2385c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
2386273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
23878965ea79SLois Curfman McInnes 
2388e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
23893782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
2390e04c1aa4SHong Zhang 
23919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
23929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
23938965ea79SLois Curfman McInnes 
23949566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
23959566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
239601b82886SBarry Smith 
23978965ea79SLois Curfman McInnes   *newmat = mat;
23983a40ed3dSBarry Smith   PetscFunctionReturn(0);
23998965ea79SLois Curfman McInnes }
24008965ea79SLois Curfman McInnes 
24019371c9d4SSatish Balay PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
240287d5ce66SSatish Balay   PetscBool isbinary;
240387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
240487d5ce66SSatish Balay   PetscBool ishdf5;
240587d5ce66SSatish Balay #endif
2406eb91f321SVaclav Hapla 
2407eb91f321SVaclav Hapla   PetscFunctionBegin;
2408eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2409eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2410eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
24119566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
24129566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
241387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
24149566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
241587d5ce66SSatish Balay #endif
2416eb91f321SVaclav Hapla   if (isbinary) {
24179566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2418eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
241987d5ce66SSatish Balay   } else if (ishdf5) {
24209566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2421eb91f321SVaclav Hapla #endif
242298921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2423eb91f321SVaclav Hapla   PetscFunctionReturn(0);
2424eb91f321SVaclav Hapla }
2425eb91f321SVaclav Hapla 
24269371c9d4SSatish Balay static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
24276e4ee0c6SHong Zhang   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
24286e4ee0c6SHong Zhang   Mat           a, b;
242990ace30eSBarry Smith 
24306e4ee0c6SHong Zhang   PetscFunctionBegin;
24316e4ee0c6SHong Zhang   a = matA->A;
24326e4ee0c6SHong Zhang   b = matB->A;
24332cf15c64SPierre Jolivet   PetscCall(MatEqual(a, b, flag));
24342cf15c64SPierre Jolivet   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
24356e4ee0c6SHong Zhang   PetscFunctionReturn(0);
24366e4ee0c6SHong Zhang }
243790ace30eSBarry Smith 
24389371c9d4SSatish Balay PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
24396718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2440baa3c1c6SHong Zhang 
2441baa3c1c6SHong Zhang   PetscFunctionBegin;
24429566063dSJacob Faibussowitsch   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
24439566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&atb->atb));
24449566063dSJacob Faibussowitsch   PetscCall(PetscFree(atb));
2445baa3c1c6SHong Zhang   PetscFunctionReturn(0);
2446baa3c1c6SHong Zhang }
2447baa3c1c6SHong Zhang 
24489371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
24496718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2450cc48ffa7SToby Isaac 
2451cc48ffa7SToby Isaac   PetscFunctionBegin;
24529566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
24539566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
24549566063dSJacob Faibussowitsch   PetscCall(PetscFree(abt));
2455cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2456cc48ffa7SToby Isaac }
2457cc48ffa7SToby Isaac 
24589371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2459baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
24606718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2461cb20be35SHong Zhang   MPI_Comm               comm;
24626718818eSStefano Zampini   PetscMPIInt            size, *recvcounts;
24636718818eSStefano Zampini   PetscScalar           *carray, *sendbuf;
2464637a0070SStefano Zampini   const PetscScalar     *atbarray;
2465a1f9a5e6SStefano Zampini   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2466e68c0b26SHong Zhang   const PetscInt        *ranges;
2467cb20be35SHong Zhang 
2468cb20be35SHong Zhang   PetscFunctionBegin;
24696718818eSStefano Zampini   MatCheckProduct(C, 3);
24705f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
24716718818eSStefano Zampini   atb        = (Mat_TransMatMultDense *)C->product->data;
24726718818eSStefano Zampini   recvcounts = atb->recvcounts;
24736718818eSStefano Zampini   sendbuf    = atb->sendbuf;
24746718818eSStefano Zampini 
24759566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
24769566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2477e68c0b26SHong Zhang 
2478c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
24799566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2480cb20be35SHong Zhang 
24819566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
2482cb20be35SHong Zhang 
2483660d5466SHong Zhang   /* arrange atbarray into sendbuf */
24849566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
24859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2486637a0070SStefano Zampini   for (proc = 0, k = 0; proc < size; proc++) {
2487baa3c1c6SHong Zhang     for (j = 0; j < cN; j++) {
2488a1f9a5e6SStefano Zampini       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2489cb20be35SHong Zhang     }
2490cb20be35SHong Zhang   }
24919566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2492637a0070SStefano Zampini 
2493c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
24949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
24959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
24969566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
24979566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
24989566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2499cb20be35SHong Zhang   PetscFunctionReturn(0);
2500cb20be35SHong Zhang }
2501cb20be35SHong Zhang 
25029371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2503cb20be35SHong Zhang   MPI_Comm               comm;
2504baa3c1c6SHong Zhang   PetscMPIInt            size;
2505660d5466SHong Zhang   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2506baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
25076718818eSStefano Zampini   PetscBool              cisdense;
25080acaeb71SStefano Zampini   PetscInt               i;
25090acaeb71SStefano Zampini   const PetscInt        *ranges;
2510cb20be35SHong Zhang 
2511cb20be35SHong Zhang   PetscFunctionBegin;
25128f83a4d9SJacob Faibussowitsch   MatCheckProduct(C, 4);
25135f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
25149566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2515cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
251698921bdaSJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2517cb20be35SHong Zhang   }
2518cb20be35SHong Zhang 
25194222ddf1SHong Zhang   /* create matrix product C */
25209566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
25219566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
252248a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
25239566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2524baa3c1c6SHong Zhang 
25254222ddf1SHong Zhang   /* create data structure for reuse C */
25269566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
25279566063dSJacob Faibussowitsch   PetscCall(PetscNew(&atb));
25284222ddf1SHong Zhang   cM = C->rmap->N;
25299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
25309566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
25310acaeb71SStefano Zampini   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2532baa3c1c6SHong Zhang 
25336718818eSStefano Zampini   C->product->data    = atb;
25346718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2535cb20be35SHong Zhang   PetscFunctionReturn(0);
2536cb20be35SHong Zhang }
2537cb20be35SHong Zhang 
25389371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2539cc48ffa7SToby Isaac   MPI_Comm               comm;
2540cc48ffa7SToby Isaac   PetscMPIInt            i, size;
2541cc48ffa7SToby Isaac   PetscInt               maxRows, bufsiz;
2542cc48ffa7SToby Isaac   PetscMPIInt            tag;
25434222ddf1SHong Zhang   PetscInt               alg;
2544cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
25454222ddf1SHong Zhang   Mat_Product           *product = C->product;
25464222ddf1SHong Zhang   PetscBool              flg;
2547cc48ffa7SToby Isaac 
2548cc48ffa7SToby Isaac   PetscFunctionBegin;
25496718818eSStefano Zampini   MatCheckProduct(C, 4);
25505f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
25514222ddf1SHong Zhang   /* check local size of A and B */
25525f80ce2aSJacob Faibussowitsch   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2553cc48ffa7SToby Isaac 
25549566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2555637a0070SStefano Zampini   alg = flg ? 0 : 1;
2556cc48ffa7SToby Isaac 
25574222ddf1SHong Zhang   /* setup matrix product C */
25589566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
25599566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATMPIDENSE));
25609566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
25619566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
25624222ddf1SHong Zhang 
25634222ddf1SHong Zhang   /* create data structure for reuse C */
25649566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
25659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
25669566063dSJacob Faibussowitsch   PetscCall(PetscNew(&abt));
2567cc48ffa7SToby Isaac   abt->tag = tag;
2568faa55883SToby Isaac   abt->alg = alg;
2569faa55883SToby Isaac   switch (alg) {
25704222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2571cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2572cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
25739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2574faa55883SToby Isaac     break;
25754222ddf1SHong Zhang   default: /* alg: "allgatherv" */
25769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
25779566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2578faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2579faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2580faa55883SToby Isaac     break;
2581faa55883SToby Isaac   }
2582cc48ffa7SToby Isaac 
25836718818eSStefano Zampini   C->product->data                = abt;
25846718818eSStefano Zampini   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
25854222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2586cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2587cc48ffa7SToby Isaac }
2588cc48ffa7SToby Isaac 
25899371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2590cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
25916718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2592cc48ffa7SToby Isaac   MPI_Comm               comm;
2593cc48ffa7SToby Isaac   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2594f4259b30SLisandro Dalcin   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2595cc48ffa7SToby Isaac   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2596cc48ffa7SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2597637a0070SStefano Zampini   const PetscScalar     *av, *bv;
259823fff9afSBarry Smith   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2599cc48ffa7SToby Isaac   MPI_Request            reqs[2];
2600cc48ffa7SToby Isaac   const PetscInt        *ranges;
2601cc48ffa7SToby Isaac 
2602cc48ffa7SToby Isaac   PetscFunctionBegin;
26036718818eSStefano Zampini   MatCheckProduct(C, 3);
26045f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
26056718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
26069566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
26079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
26089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
26099566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
26109566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
26119566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
26129566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
26139566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
26149566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &i));
26159566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &blda));
26169566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
26179566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
26189566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(B, &ranges));
2619cc48ffa7SToby Isaac   bn = B->rmap->n;
2620637a0070SStefano Zampini   if (blda == bn) {
2621637a0070SStefano Zampini     sendbuf = (PetscScalar *)bv;
2622cc48ffa7SToby Isaac   } else {
2623cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2624cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2625ad540459SPierre Jolivet       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2626cc48ffa7SToby Isaac     }
2627cc48ffa7SToby Isaac   }
2628cc48ffa7SToby Isaac   if (size > 1) {
2629cc48ffa7SToby Isaac     sendto   = (rank + size - 1) % size;
2630cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2631cc48ffa7SToby Isaac   } else {
2632cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2633cc48ffa7SToby Isaac   }
26349566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
26359566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2636cc48ffa7SToby Isaac   recvisfrom = rank;
2637cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2638cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2639cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2640cc48ffa7SToby Isaac     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2641cc48ffa7SToby Isaac 
2642cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2643cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2644cc48ffa7SToby Isaac       sendsiz = cK * bn;
2645cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2646cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
26479566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
26489566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2649cc48ffa7SToby Isaac     }
2650cc48ffa7SToby Isaac 
2651cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
26529566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2653792fecdfSBarry Smith     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2654cc48ffa7SToby Isaac 
2655cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2656cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
26579566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2658cc48ffa7SToby Isaac     }
2659cc48ffa7SToby Isaac     bn         = nextbn;
2660cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2661cc48ffa7SToby Isaac     sendbuf    = recvbuf;
2662cc48ffa7SToby Isaac   }
26639566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
26649566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
26659566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
26669566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
26679566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2668cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2669cc48ffa7SToby Isaac }
2670cc48ffa7SToby Isaac 
26719371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2672faa55883SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
26736718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2674faa55883SToby Isaac   MPI_Comm               comm;
2675637a0070SStefano Zampini   PetscMPIInt            size;
2676637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2677637a0070SStefano Zampini   const PetscScalar     *av, *bv;
2678637a0070SStefano Zampini   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2679faa55883SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2680637a0070SStefano Zampini   PetscBLASInt           cm, cn, ck, alda, clda;
2681faa55883SToby Isaac 
2682faa55883SToby Isaac   PetscFunctionBegin;
26836718818eSStefano Zampini   MatCheckProduct(C, 3);
26845f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
26856718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
26869566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
26879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
26889566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
26899566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
26909566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
26919566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
26929566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
26939566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &blda));
26949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
26959566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
2696faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2697faa55883SToby Isaac   bn      = B->rmap->n;
2698faa55883SToby Isaac   sendbuf = abt->buf[0];
2699faa55883SToby Isaac   recvbuf = abt->buf[1];
2700faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2701ad540459SPierre Jolivet     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2702faa55883SToby Isaac   }
27039566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
27049566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
27059566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
27069566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
27079566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2708792fecdfSBarry Smith   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
27099566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
27109566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
27119566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
27129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
27139566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2714faa55883SToby Isaac   PetscFunctionReturn(0);
2715faa55883SToby Isaac }
2716faa55883SToby Isaac 
27179371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
27186718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2719faa55883SToby Isaac 
2720faa55883SToby Isaac   PetscFunctionBegin;
27216718818eSStefano Zampini   MatCheckProduct(C, 3);
27225f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
27236718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
2724faa55883SToby Isaac   switch (abt->alg) {
27259371c9d4SSatish Balay   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
27269371c9d4SSatish Balay   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2727faa55883SToby Isaac   }
2728faa55883SToby Isaac   PetscFunctionReturn(0);
2729faa55883SToby Isaac }
2730faa55883SToby Isaac 
27319371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
27326718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2733320f2790SHong Zhang 
2734320f2790SHong Zhang   PetscFunctionBegin;
27359566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ce));
27369566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ae));
27379566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Be));
27389566063dSJacob Faibussowitsch   PetscCall(PetscFree(ab));
2739320f2790SHong Zhang   PetscFunctionReturn(0);
2740320f2790SHong Zhang }
2741320f2790SHong Zhang 
27425242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
27439371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
27446718818eSStefano Zampini   Mat_MatMultDense *ab;
2745320f2790SHong Zhang 
2746320f2790SHong Zhang   PetscFunctionBegin;
27476718818eSStefano Zampini   MatCheckProduct(C, 3);
27485f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
27496718818eSStefano Zampini   ab = (Mat_MatMultDense *)C->product->data;
27509566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
27519566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
27529566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
27539566063dSJacob Faibussowitsch   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2754320f2790SHong Zhang   PetscFunctionReturn(0);
2755320f2790SHong Zhang }
2756320f2790SHong Zhang 
27579371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2758320f2790SHong Zhang   Mat               Ae, Be, Ce;
2759320f2790SHong Zhang   Mat_MatMultDense *ab;
2760320f2790SHong Zhang 
2761320f2790SHong Zhang   PetscFunctionBegin;
27626718818eSStefano Zampini   MatCheckProduct(C, 4);
27635f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
27644222ddf1SHong Zhang   /* check local size of A and B */
2765320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
276698921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2767320f2790SHong Zhang   }
2768320f2790SHong Zhang 
27694222ddf1SHong Zhang   /* create elemental matrices Ae and Be */
27709566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
27719566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
27729566063dSJacob Faibussowitsch   PetscCall(MatSetType(Ae, MATELEMENTAL));
27739566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Ae));
27749566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2775320f2790SHong Zhang 
27769566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
27779566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
27789566063dSJacob Faibussowitsch   PetscCall(MatSetType(Be, MATELEMENTAL));
27799566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Be));
27809566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2781320f2790SHong Zhang 
27824222ddf1SHong Zhang   /* compute symbolic Ce = Ae*Be */
27839566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
27849566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
27854222ddf1SHong Zhang 
27864222ddf1SHong Zhang   /* setup C */
27879566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
27889566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATDENSE));
27899566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2790320f2790SHong Zhang 
2791320f2790SHong Zhang   /* create data structure for reuse Cdense */
27929566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ab));
2793320f2790SHong Zhang   ab->Ae = Ae;
2794320f2790SHong Zhang   ab->Be = Be;
2795320f2790SHong Zhang   ab->Ce = Ce;
27966718818eSStefano Zampini 
27976718818eSStefano Zampini   C->product->data       = ab;
27986718818eSStefano Zampini   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
27994222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2800320f2790SHong Zhang   PetscFunctionReturn(0);
2801320f2790SHong Zhang }
28024222ddf1SHong Zhang #endif
28034222ddf1SHong Zhang /* ----------------------------------------------- */
28044222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
28059371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2806320f2790SHong Zhang   PetscFunctionBegin;
28074222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
28084222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
2809320f2790SHong Zhang   PetscFunctionReturn(0);
2810320f2790SHong Zhang }
28115242a7b1SHong Zhang #endif
281286aefd0dSHong Zhang 
28139371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
28144222ddf1SHong Zhang   Mat_Product *product = C->product;
28154222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
28164222ddf1SHong Zhang 
28174222ddf1SHong Zhang   PetscFunctionBegin;
28184222ddf1SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
281998921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
28206718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
28216718818eSStefano Zampini   C->ops->productsymbolic          = MatProductSymbolic_AtB;
28224222ddf1SHong Zhang   PetscFunctionReturn(0);
28234222ddf1SHong Zhang }
28244222ddf1SHong Zhang 
28259371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
28264222ddf1SHong Zhang   Mat_Product *product     = C->product;
28274222ddf1SHong Zhang   const char  *algTypes[2] = {"allgatherv", "cyclic"};
28284222ddf1SHong Zhang   PetscInt     alg, nalg = 2;
28294222ddf1SHong Zhang   PetscBool    flg = PETSC_FALSE;
28304222ddf1SHong Zhang 
28314222ddf1SHong Zhang   PetscFunctionBegin;
28324222ddf1SHong Zhang   /* Set default algorithm */
28334222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
28349566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "default", &flg));
283548a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
28364222ddf1SHong Zhang 
28374222ddf1SHong Zhang   /* Get runtime option */
28384222ddf1SHong Zhang   if (product->api_user) {
2839d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
28409566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2841d0609cedSBarry Smith     PetscOptionsEnd();
28424222ddf1SHong Zhang   } else {
2843d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
28449566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2845d0609cedSBarry Smith     PetscOptionsEnd();
28464222ddf1SHong Zhang   }
284748a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
28484222ddf1SHong Zhang 
28494222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
28504222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
28514222ddf1SHong Zhang   PetscFunctionReturn(0);
28524222ddf1SHong Zhang }
28534222ddf1SHong Zhang 
28549371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
28554222ddf1SHong Zhang   Mat_Product *product = C->product;
28564222ddf1SHong Zhang 
28574222ddf1SHong Zhang   PetscFunctionBegin;
28584222ddf1SHong Zhang   switch (product->type) {
28594222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
28609371c9d4SSatish Balay   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
28614222ddf1SHong Zhang #endif
28629371c9d4SSatish Balay   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
28639371c9d4SSatish Balay   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
28649371c9d4SSatish Balay   default: break;
28654222ddf1SHong Zhang   }
28664222ddf1SHong Zhang   PetscFunctionReturn(0);
28674222ddf1SHong Zhang }
2868