xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
8baa3c1c6SHong Zhang #include <petscblaslapack.h>
98965ea79SLois Curfman McInnes 
10ab92ecdeSBarry Smith /*@
1111a5261eSBarry Smith       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
13ab92ecdeSBarry Smith 
14ab92ecdeSBarry Smith     Input Parameter:
1511a5261eSBarry Smith .      A - the sequential or MPI dense matrix
16ab92ecdeSBarry Smith 
17ab92ecdeSBarry Smith     Output Parameter:
18ab92ecdeSBarry Smith .      B - the inner matrix
19ab92ecdeSBarry Smith 
208e6c10adSSatish Balay     Level: intermediate
218e6c10adSSatish Balay 
2211a5261eSBarry Smith .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23ab92ecdeSBarry Smith @*/
249371c9d4SSatish Balay PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25ab92ecdeSBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26ace3abfcSBarry Smith   PetscBool     flg;
27ab92ecdeSBarry Smith 
28ab92ecdeSBarry Smith   PetscFunctionBegin;
29d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30d5ea218eSStefano Zampini   PetscValidPointer(B, 2);
319566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
322205254eSKarl Rupp   if (flg) *B = mat->A;
33f140bc17SStefano Zampini   else {
349566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
3528b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36f140bc17SStefano Zampini     *B = A;
37f140bc17SStefano Zampini   }
38ab92ecdeSBarry Smith   PetscFunctionReturn(0);
39ab92ecdeSBarry Smith }
40ab92ecdeSBarry Smith 
419371c9d4SSatish Balay PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42a76f77c3SStefano Zampini   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43a76f77c3SStefano Zampini   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44a76f77c3SStefano Zampini 
45a76f77c3SStefano Zampini   PetscFunctionBegin;
469566063dSJacob Faibussowitsch   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47a76f77c3SStefano Zampini   PetscFunctionReturn(0);
48a76f77c3SStefano Zampini }
49a76f77c3SStefano Zampini 
509371c9d4SSatish Balay PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
512f605a99SJose E. Roman   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
522f605a99SJose E. Roman   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
532f605a99SJose E. Roman   PetscScalar  *v;
542f605a99SJose E. Roman 
552f605a99SJose E. Roman   PetscFunctionBegin;
562f605a99SJose E. Roman   PetscCall(MatDenseGetArray(mat->A, &v));
572f605a99SJose E. Roman   PetscCall(MatDenseGetLDA(mat->A, &lda));
582f605a99SJose E. Roman   rend2 = PetscMin(rend, A->cmap->N);
592f605a99SJose E. Roman   if (rend2 > rstart) {
602f605a99SJose E. Roman     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
612f605a99SJose E. Roman     PetscCall(PetscLogFlops(rend2 - rstart));
622f605a99SJose E. Roman   }
632f605a99SJose E. Roman   PetscCall(MatDenseRestoreArray(mat->A, &v));
642f605a99SJose E. Roman   PetscFunctionReturn(0);
652f605a99SJose E. Roman }
662f605a99SJose E. Roman 
679371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68ba8c8a56SBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69d0f46423SBarry Smith   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70ba8c8a56SBarry Smith 
71ba8c8a56SBarry Smith   PetscFunctionBegin;
72aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73ba8c8a56SBarry Smith   lrow = row - rstart;
749566063dSJacob Faibussowitsch   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75ba8c8a56SBarry Smith   PetscFunctionReturn(0);
76ba8c8a56SBarry Smith }
77ba8c8a56SBarry Smith 
789371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79637a0070SStefano Zampini   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80637a0070SStefano Zampini   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81ba8c8a56SBarry Smith 
82ba8c8a56SBarry Smith   PetscFunctionBegin;
83aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84637a0070SStefano Zampini   lrow = row - rstart;
859566063dSJacob Faibussowitsch   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86ba8c8a56SBarry Smith   PetscFunctionReturn(0);
87ba8c8a56SBarry Smith }
88ba8c8a56SBarry Smith 
899371c9d4SSatish Balay PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
900de54da6SSatish Balay   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91d0f46423SBarry Smith   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
9287828ca2SBarry Smith   PetscScalar  *array;
930de54da6SSatish Balay   MPI_Comm      comm;
944b6ae80fSStefano Zampini   PetscBool     flg;
9511bd1e4dSLisandro Dalcin   Mat           B;
960de54da6SSatish Balay 
970de54da6SSatish Balay   PetscFunctionBegin;
989566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &flg));
9928b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
1009566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
1024b6ae80fSStefano Zampini 
1039566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
10428b400f6SJacob Faibussowitsch     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
1059566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
1069566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &B));
1079566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, m, m, m, m));
1089566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
1099566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
1109566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
1119566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
1129566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
11311bd1e4dSLisandro Dalcin     *a = B;
1149566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
1152205254eSKarl Rupp   } else *a = B;
1160de54da6SSatish Balay   PetscFunctionReturn(0);
1170de54da6SSatish Balay }
1180de54da6SSatish Balay 
1199371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
12039b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122ace3abfcSBarry Smith   PetscBool     roworiented = A->roworiented;
1238965ea79SLois Curfman McInnes 
1243a40ed3dSBarry Smith   PetscFunctionBegin;
1258965ea79SLois Curfman McInnes   for (i = 0; i < m; i++) {
1265ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
12708401ef6SPierre Jolivet     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
1288965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1298965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
13039b7565bSBarry Smith       if (roworiented) {
1319566063dSJacob Faibussowitsch         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
1323a40ed3dSBarry Smith       } else {
1338965ea79SLois Curfman McInnes         for (j = 0; j < n; j++) {
1345ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
13508401ef6SPierre Jolivet           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1369566063dSJacob Faibussowitsch           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
13739b7565bSBarry Smith         }
1388965ea79SLois Curfman McInnes       }
1392205254eSKarl Rupp     } else if (!A->donotstash) {
1405080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
14139b7565bSBarry Smith       if (roworiented) {
1429566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143d36fbae8SSatish Balay       } else {
1449566063dSJacob Faibussowitsch         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
14539b7565bSBarry Smith       }
146b49de8d1SLois Curfman McInnes     }
147b49de8d1SLois Curfman McInnes   }
1483a40ed3dSBarry Smith   PetscFunctionReturn(0);
149b49de8d1SLois Curfman McInnes }
150b49de8d1SLois Curfman McInnes 
1519371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154b49de8d1SLois Curfman McInnes 
1553a40ed3dSBarry Smith   PetscFunctionBegin;
156b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
15754c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
15854c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
160b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
161b49de8d1SLois Curfman McInnes       for (j = 0; j < n; j++) {
16254c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
16354c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1649566063dSJacob Faibussowitsch         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165b49de8d1SLois Curfman McInnes       }
166e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
1678965ea79SLois Curfman McInnes   }
1683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1698965ea79SLois Curfman McInnes }
1708965ea79SLois Curfman McInnes 
1719371c9d4SSatish Balay static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
17249a6ff4bSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17349a6ff4bSBarry Smith 
17449a6ff4bSBarry Smith   PetscFunctionBegin;
1759566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, lda));
17649a6ff4bSBarry Smith   PetscFunctionReturn(0);
17749a6ff4bSBarry Smith }
17849a6ff4bSBarry Smith 
1799371c9d4SSatish Balay static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180ad16ce7aSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
18117359960SJose E. Roman   PetscBool     iscuda;
182ad16ce7aSStefano Zampini 
183ad16ce7aSStefano Zampini   PetscFunctionBegin;
18417359960SJose E. Roman   if (!a->A) {
18528b400f6SJacob Faibussowitsch     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1869566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->rmap));
1879566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->cmap));
1889566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1899566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1909566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1919566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
19217359960SJose E. Roman   }
1939566063dSJacob Faibussowitsch   PetscCall(MatDenseSetLDA(a->A, lda));
194ad16ce7aSStefano Zampini   PetscFunctionReturn(0);
195ad16ce7aSStefano Zampini }
196ad16ce7aSStefano Zampini 
1979371c9d4SSatish Balay static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
198ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
199ff14e315SSatish Balay 
2003a40ed3dSBarry Smith   PetscFunctionBegin;
20128b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2029566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, array));
2033a40ed3dSBarry Smith   PetscFunctionReturn(0);
204ff14e315SSatish Balay }
205ff14e315SSatish Balay 
2069371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
2078572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2088572280aSBarry Smith 
2098572280aSBarry Smith   PetscFunctionBegin;
21028b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2119566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, array));
2128572280aSBarry Smith   PetscFunctionReturn(0);
2138572280aSBarry Smith }
2148572280aSBarry Smith 
2159371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
2166947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2176947451fSStefano Zampini 
2186947451fSStefano Zampini   PetscFunctionBegin;
21928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2209566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, array));
2216947451fSStefano Zampini   PetscFunctionReturn(0);
2226947451fSStefano Zampini }
2236947451fSStefano Zampini 
2249371c9d4SSatish Balay static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
225d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226d3042a70SBarry Smith 
227d3042a70SBarry Smith   PetscFunctionBegin;
22828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
22928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2309566063dSJacob Faibussowitsch   PetscCall(MatDensePlaceArray(a->A, array));
231d3042a70SBarry Smith   PetscFunctionReturn(0);
232d3042a70SBarry Smith }
233d3042a70SBarry Smith 
2349371c9d4SSatish Balay static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
235d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236d3042a70SBarry Smith 
237d3042a70SBarry Smith   PetscFunctionBegin;
23828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
23928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2409566063dSJacob Faibussowitsch   PetscCall(MatDenseResetArray(a->A));
241d3042a70SBarry Smith   PetscFunctionReturn(0);
242d3042a70SBarry Smith }
243d3042a70SBarry Smith 
2449371c9d4SSatish Balay static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
245d5ea218eSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246d5ea218eSStefano Zampini 
247d5ea218eSStefano Zampini   PetscFunctionBegin;
24828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
24928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2509566063dSJacob Faibussowitsch   PetscCall(MatDenseReplaceArray(a->A, array));
251d5ea218eSStefano Zampini   PetscFunctionReturn(0);
252d5ea218eSStefano Zampini }
253d5ea218eSStefano Zampini 
2549371c9d4SSatish Balay static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
255ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
256637a0070SStefano Zampini   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
2575d0c19d7SBarry Smith   const PetscInt    *irow, *icol;
258637a0070SStefano Zampini   const PetscScalar *v;
259637a0070SStefano Zampini   PetscScalar       *bv;
260ca3fa75bSLois Curfman McInnes   Mat                newmat;
2614aa3045dSJed Brown   IS                 iscol_local;
26242a884f0SBarry Smith   MPI_Comm           comm_is, comm_mat;
263ca3fa75bSLois Curfman McInnes 
264ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
2659566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
2669566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
26708401ef6SPierre Jolivet   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
26842a884f0SBarry Smith 
2699566063dSJacob Faibussowitsch   PetscCall(ISAllGather(iscol, &iscol_local));
2709566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &irow));
2719566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &icol));
2729566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &nrows));
2739566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
2749566063dSJacob Faibussowitsch   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
275ca3fa75bSLois Curfman McInnes 
276ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2777eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2787eba5e9cSLois Curfman McInnes      original matrix! */
279ca3fa75bSLois Curfman McInnes 
2809566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
2819566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
282ca3fa75bSLois Curfman McInnes 
283ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
284ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
285e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2867eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
287ca3fa75bSLois Curfman McInnes     newmat = *B;
288ca3fa75bSLois Curfman McInnes   } else {
289ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
2909566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
2919566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
2929566063dSJacob Faibussowitsch     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
2939566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
294ca3fa75bSLois Curfman McInnes   }
295ca3fa75bSLois Curfman McInnes 
296ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
297ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense *)newmat->data;
2989566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(newmatd->A, &bv));
2999566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mat->A, &v));
3009566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mat->A, &lda));
3014aa3045dSJed Brown   for (i = 0; i < Ncols; i++) {
302637a0070SStefano Zampini     const PetscScalar *av = v + lda * icol[i];
303ad540459SPierre Jolivet     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
304ca3fa75bSLois Curfman McInnes   }
3059566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
3069566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
307ca3fa75bSLois Curfman McInnes 
308ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
3099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
3109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
311ca3fa75bSLois Curfman McInnes 
312ca3fa75bSLois Curfman McInnes   /* Free work space */
3139566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &irow));
3149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &icol));
3159566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&iscol_local));
316ca3fa75bSLois Curfman McInnes   *B = newmat;
317ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
318ca3fa75bSLois Curfman McInnes }
319ca3fa75bSLois Curfman McInnes 
3209371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
32173a71a0fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
32273a71a0fSBarry Smith 
3233a40ed3dSBarry Smith   PetscFunctionBegin;
3249566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, array));
3253a40ed3dSBarry Smith   PetscFunctionReturn(0);
326ff14e315SSatish Balay }
327ff14e315SSatish Balay 
3289371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
3298572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3308572280aSBarry Smith 
3318572280aSBarry Smith   PetscFunctionBegin;
3329566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, array));
3338572280aSBarry Smith   PetscFunctionReturn(0);
3348572280aSBarry Smith }
3358572280aSBarry Smith 
3369371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
3376947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3386947451fSStefano Zampini 
3396947451fSStefano Zampini   PetscFunctionBegin;
3409566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
3416947451fSStefano Zampini   PetscFunctionReturn(0);
3426947451fSStefano Zampini }
3436947451fSStefano Zampini 
3449371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
34539ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
34613f74950SBarry Smith   PetscInt      nstash, reallocs;
3478965ea79SLois Curfman McInnes 
3483a40ed3dSBarry Smith   PetscFunctionBegin;
3494067683fSStefano Zampini   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
3508965ea79SLois Curfman McInnes 
3519566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
3529566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
3539566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
3543a40ed3dSBarry Smith   PetscFunctionReturn(0);
3558965ea79SLois Curfman McInnes }
3568965ea79SLois Curfman McInnes 
3579371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
35839ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
35913f74950SBarry Smith   PetscInt      i, *row, *col, flg, j, rstart, ncols;
36013f74950SBarry Smith   PetscMPIInt   n;
36187828ca2SBarry Smith   PetscScalar  *val;
3628965ea79SLois Curfman McInnes 
3633a40ed3dSBarry Smith   PetscFunctionBegin;
364910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
3658965ea79SLois Curfman McInnes     /*  wait on receives */
3667ef1d9bdSSatish Balay     while (1) {
3679566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
3687ef1d9bdSSatish Balay       if (!flg) break;
3698965ea79SLois Curfman McInnes 
3707ef1d9bdSSatish Balay       for (i = 0; i < n;) {
3717ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
3722205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
3732205254eSKarl Rupp           if (row[j] != rstart) break;
3742205254eSKarl Rupp         }
3757ef1d9bdSSatish Balay         if (j < n) ncols = j - i;
3767ef1d9bdSSatish Balay         else ncols = n - i;
3777ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
3789566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
3797ef1d9bdSSatish Balay         i = j;
3808965ea79SLois Curfman McInnes       }
3817ef1d9bdSSatish Balay     }
3829566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
383910cf402Sprj-   }
3848965ea79SLois Curfman McInnes 
3859566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mdn->A, mode));
3869566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mdn->A, mode));
3873a40ed3dSBarry Smith   PetscFunctionReturn(0);
3888965ea79SLois Curfman McInnes }
3898965ea79SLois Curfman McInnes 
3909371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
39139ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
3923a40ed3dSBarry Smith 
3933a40ed3dSBarry Smith   PetscFunctionBegin;
3949566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
3953a40ed3dSBarry Smith   PetscFunctionReturn(0);
3968965ea79SLois Curfman McInnes }
3978965ea79SLois Curfman McInnes 
3989371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
39939ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
400637a0070SStefano Zampini   PetscInt      i, len, *lrows;
401637a0070SStefano Zampini 
402637a0070SStefano Zampini   PetscFunctionBegin;
403637a0070SStefano Zampini   /* get locally owned rows */
4049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
405637a0070SStefano Zampini   /* fix right hand side if needed */
406637a0070SStefano Zampini   if (x && b) {
40797b48c8fSBarry Smith     const PetscScalar *xx;
40897b48c8fSBarry Smith     PetscScalar       *bb;
4098965ea79SLois Curfman McInnes 
4109566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
4119566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(b, &bb));
412637a0070SStefano Zampini     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
4139566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
4149566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(b, &bb));
41597b48c8fSBarry Smith   }
4169566063dSJacob Faibussowitsch   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
417e2eb51b1SBarry Smith   if (diag != 0.0) {
418637a0070SStefano Zampini     Vec d;
419b9679d65SBarry Smith 
4209566063dSJacob Faibussowitsch     PetscCall(MatCreateVecs(A, NULL, &d));
4219566063dSJacob Faibussowitsch     PetscCall(VecSet(d, diag));
4229566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
4239566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&d));
424b9679d65SBarry Smith   }
4259566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
4263a40ed3dSBarry Smith   PetscFunctionReturn(0);
4278965ea79SLois Curfman McInnes }
4288965ea79SLois Curfman McInnes 
429cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
430cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
431cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
432cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
433cc2e6a90SBarry Smith 
4349371c9d4SSatish Balay PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
43539ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
436637a0070SStefano Zampini   const PetscScalar *ax;
437637a0070SStefano Zampini   PetscScalar       *ay;
438d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
439c456f294SBarry Smith 
4403a40ed3dSBarry Smith   PetscFunctionBegin;
4416f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4449566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4459566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4489566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
4493a40ed3dSBarry Smith   PetscFunctionReturn(0);
4508965ea79SLois Curfman McInnes }
4518965ea79SLois Curfman McInnes 
4529371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
45339ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
454637a0070SStefano Zampini   const PetscScalar *ax;
455637a0070SStefano Zampini   PetscScalar       *ay;
456d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
457c456f294SBarry Smith 
4583a40ed3dSBarry Smith   PetscFunctionBegin;
4596f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4629566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4639566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4669566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
4673a40ed3dSBarry Smith   PetscFunctionReturn(0);
4688965ea79SLois Curfman McInnes }
4698965ea79SLois Curfman McInnes 
4709371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
471096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
472637a0070SStefano Zampini   const PetscScalar *ax;
473637a0070SStefano Zampini   PetscScalar       *ay;
474d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
475096963f5SLois Curfman McInnes 
4763a40ed3dSBarry Smith   PetscFunctionBegin;
4776f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
4789566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
4799566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
4809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
4819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
4829566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
4839566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
4849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
4859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
4863a40ed3dSBarry Smith   PetscFunctionReturn(0);
487096963f5SLois Curfman McInnes }
488096963f5SLois Curfman McInnes 
4899371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
490096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
491637a0070SStefano Zampini   const PetscScalar *ax;
492637a0070SStefano Zampini   PetscScalar       *ay;
493d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
494096963f5SLois Curfman McInnes 
4953a40ed3dSBarry Smith   PetscFunctionBegin;
4966f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
4979566063dSJacob Faibussowitsch   PetscCall(VecCopy(yy, zz));
4989566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
4999566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5009566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
5019566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5029566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
5053a40ed3dSBarry Smith   PetscFunctionReturn(0);
506096963f5SLois Curfman McInnes }
507096963f5SLois Curfman McInnes 
5089371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
50939ddd567SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
510637a0070SStefano Zampini   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
51187828ca2SBarry Smith   PetscScalar       *x, zero = 0.0;
512637a0070SStefano Zampini   const PetscScalar *av;
513ed3cc1f0SBarry Smith 
5143a40ed3dSBarry Smith   PetscFunctionBegin;
5159566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
5169566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
5179566063dSJacob Faibussowitsch   PetscCall(VecGetSize(v, &n));
51808401ef6SPierre Jolivet   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
519d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
520d0f46423SBarry Smith   radd = A->rmap->rstart * m;
5219566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
5229566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
523ad540459SPierre Jolivet   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
5249566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
5259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
5263a40ed3dSBarry Smith   PetscFunctionReturn(0);
5278965ea79SLois Curfman McInnes }
5288965ea79SLois Curfman McInnes 
5299371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIDense(Mat mat) {
5303501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
531ed3cc1f0SBarry Smith 
5323a40ed3dSBarry Smith   PetscFunctionBegin;
533aa482453SBarry Smith #if defined(PETSC_USE_LOG)
534c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
5358965ea79SLois Curfman McInnes #endif
5369566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
53728b400f6SJacob Faibussowitsch   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
53828b400f6SJacob Faibussowitsch   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
5399566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->A));
5409566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->lvec));
5419566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mdn->Mvctx));
5429566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->cvec));
5439566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->cmat));
54401b82886SBarry Smith 
5459566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
5469566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
5478baccfbdSHong Zhang 
5489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
5499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
5509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
5519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
5529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
5539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
5549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
5559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
5569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
5579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
5589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
5599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
5609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
5618baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
5638baccfbdSHong Zhang #endif
564d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
5659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
566d24d4204SJose E. Roman #endif
5679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
5689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
5699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
5706718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
5719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
5729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
5732e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
5742e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
5752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
5762e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
5772e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
5782e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
5792e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
5802e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
5812e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
5822e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
5832e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
5842e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
5852e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
5862e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
5872e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
5886718818eSStefano Zampini #endif
5899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
5909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
5919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
5929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
5939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
5949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
5959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
5969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
5979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
5989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
599f8103e6bSStefano Zampini 
600f8103e6bSStefano Zampini   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
6013a40ed3dSBarry Smith   PetscFunctionReturn(0);
6028965ea79SLois Curfman McInnes }
60339ddd567SLois Curfman McInnes 
60452c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
60552c5f739Sprj- 
6069804daf3SBarry Smith #include <petscdraw.h>
6079371c9d4SSatish Balay static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
60839ddd567SLois Curfman McInnes   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
609616b8fbbSStefano Zampini   PetscMPIInt       rank;
61019fd82e9SBarry Smith   PetscViewerType   vtype;
611ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
612b0a32e0cSBarry Smith   PetscViewer       sviewer;
613f3ef73ceSBarry Smith   PetscViewerFormat format;
6148965ea79SLois Curfman McInnes 
6153a40ed3dSBarry Smith   PetscFunctionBegin;
6169566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
6179566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
6189566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
61932077d6dSBarry Smith   if (iascii) {
6209566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetType(viewer, &vtype));
6219566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
622456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6234e220ebcSLois Curfman McInnes       MatInfo info;
6249566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
6259566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
6269371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
6279371c9d4SSatish Balay                                                    (PetscInt)info.memory));
6289566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
6299566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
6306f08ad05SPierre Jolivet       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
6313a40ed3dSBarry Smith       PetscFunctionReturn(0);
632fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6333a40ed3dSBarry Smith       PetscFunctionReturn(0);
6348965ea79SLois Curfman McInnes     }
635f1af5d2fSBarry Smith   } else if (isdraw) {
636b0a32e0cSBarry Smith     PetscDraw draw;
637ace3abfcSBarry Smith     PetscBool isnull;
638f1af5d2fSBarry Smith 
6399566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
6409566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
641f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
642f1af5d2fSBarry Smith   }
64377ed5343SBarry Smith 
6447da1fb6eSBarry Smith   {
6458965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6468965ea79SLois Curfman McInnes     Mat          A;
647d0f46423SBarry Smith     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
648ba8c8a56SBarry Smith     PetscInt    *cols;
649ba8c8a56SBarry Smith     PetscScalar *vals;
6508965ea79SLois Curfman McInnes 
6519566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
652dd400576SPatrick Sanan     if (rank == 0) {
6539566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
6543a40ed3dSBarry Smith     } else {
6559566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
6568965ea79SLois Curfman McInnes     }
6577adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
6589566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPIDENSE));
6599566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
6608965ea79SLois Curfman McInnes 
66139ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
66239ddd567SLois Curfman McInnes        but it's quick for now */
66351022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6642205254eSKarl Rupp 
6652205254eSKarl Rupp     row = mat->rmap->rstart;
6662205254eSKarl Rupp     m   = mdn->A->rmap->n;
6678965ea79SLois Curfman McInnes     for (i = 0; i < m; i++) {
6689566063dSJacob Faibussowitsch       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
6699566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
6709566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
67139ddd567SLois Curfman McInnes       row++;
6728965ea79SLois Curfman McInnes     }
6738965ea79SLois Curfman McInnes 
6749566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
6759566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
6769566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
677dd400576SPatrick Sanan     if (rank == 0) {
6789566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
6799566063dSJacob Faibussowitsch       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
6808965ea79SLois Curfman McInnes     }
6819566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
6829566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
6839566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
6848965ea79SLois Curfman McInnes   }
6853a40ed3dSBarry Smith   PetscFunctionReturn(0);
6868965ea79SLois Curfman McInnes }
6878965ea79SLois Curfman McInnes 
6889371c9d4SSatish Balay PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
689ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw, issocket;
6908965ea79SLois Curfman McInnes 
691433994e6SBarry Smith   PetscFunctionBegin;
6929566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
6939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
6949566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
6959566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
6960f5bd95cSBarry Smith 
69732077d6dSBarry Smith   if (iascii || issocket || isdraw) {
6989566063dSJacob Faibussowitsch     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
6991baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
7003a40ed3dSBarry Smith   PetscFunctionReturn(0);
7018965ea79SLois Curfman McInnes }
7028965ea79SLois Curfman McInnes 
7039371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
7043501a2bdSLois Curfman McInnes   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
7053501a2bdSLois Curfman McInnes   Mat            mdn = mat->A;
7063966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
7078965ea79SLois Curfman McInnes 
7083a40ed3dSBarry Smith   PetscFunctionBegin;
7094e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7102205254eSKarl Rupp 
7119566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
7122205254eSKarl Rupp 
7139371c9d4SSatish Balay   isend[0] = info->nz_used;
7149371c9d4SSatish Balay   isend[1] = info->nz_allocated;
7159371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
7169371c9d4SSatish Balay   isend[3] = info->memory;
7179371c9d4SSatish Balay   isend[4] = info->mallocs;
7188965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7194e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7204e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7214e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7224e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7234e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7248965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
7251c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
7262205254eSKarl Rupp 
7274e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7284e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7294e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7304e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7314e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7328965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
7331c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
7342205254eSKarl Rupp 
7354e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7364e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7374e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7384e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7394e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7408965ea79SLois Curfman McInnes   }
7414e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7424e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7434e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7443a40ed3dSBarry Smith   PetscFunctionReturn(0);
7458965ea79SLois Curfman McInnes }
7468965ea79SLois Curfman McInnes 
7479371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
74839ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
7498965ea79SLois Curfman McInnes 
7503a40ed3dSBarry Smith   PetscFunctionBegin;
75112c028f9SKris Buschelman   switch (op) {
752512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
75312c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
75412c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
75543674050SBarry Smith     MatCheckPreallocated(A, 1);
7569566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
75712c028f9SKris Buschelman     break;
75812c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
75943674050SBarry Smith     MatCheckPreallocated(A, 1);
7604e0d8c25SBarry Smith     a->roworiented = flg;
7619566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
76212c028f9SKris Buschelman     break;
7638c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
76413fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
76512c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
7669371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
7679371c9d4SSatish Balay   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
76877e54ba9SKris Buschelman   case MAT_SYMMETRIC:
76977e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7709a4540c5SBarry Smith   case MAT_HERMITIAN:
7719a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
772b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
773b94d7dedSBarry Smith   case MAT_SPD:
774600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
7755d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
776b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
777b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
7789566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
77977e54ba9SKris Buschelman     break;
7809371c9d4SSatish Balay   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
7813a40ed3dSBarry Smith   }
7823a40ed3dSBarry Smith   PetscFunctionReturn(0);
7838965ea79SLois Curfman McInnes }
7848965ea79SLois Curfman McInnes 
7859371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
7865b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
787637a0070SStefano Zampini   const PetscScalar *l;
788637a0070SStefano Zampini   PetscScalar        x, *v, *vv, *r;
789637a0070SStefano Zampini   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
7905b2fa520SLois Curfman McInnes 
7915b2fa520SLois Curfman McInnes   PetscFunctionBegin;
7929566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(mdn->A, &vv));
7939566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mdn->A, &lda));
7949566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &s2, &s3));
7955b2fa520SLois Curfman McInnes   if (ll) {
7969566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s2a));
79708401ef6SPierre Jolivet     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
7989566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(ll, &l));
7995b2fa520SLois Curfman McInnes     for (i = 0; i < m; i++) {
8005b2fa520SLois Curfman McInnes       x = l[i];
801637a0070SStefano Zampini       v = vv + i;
8029371c9d4SSatish Balay       for (j = 0; j < n; j++) {
8039371c9d4SSatish Balay         (*v) *= x;
8049371c9d4SSatish Balay         v += lda;
8059371c9d4SSatish Balay       }
8065b2fa520SLois Curfman McInnes     }
8079566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(ll, &l));
8089566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
8095b2fa520SLois Curfman McInnes   }
8105b2fa520SLois Curfman McInnes   if (rr) {
811637a0070SStefano Zampini     const PetscScalar *ar;
812637a0070SStefano Zampini 
8139566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s3a));
81408401ef6SPierre Jolivet     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
8159566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(rr, &ar));
8166f08ad05SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
8179566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mdn->lvec, &r));
8189566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
8199566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
8209566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(rr, &ar));
8215b2fa520SLois Curfman McInnes     for (i = 0; i < n; i++) {
8225b2fa520SLois Curfman McInnes       x = r[i];
823637a0070SStefano Zampini       v = vv + i * lda;
8242205254eSKarl Rupp       for (j = 0; j < m; j++) (*v++) *= x;
8255b2fa520SLois Curfman McInnes     }
8269566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mdn->lvec, &r));
8279566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
8285b2fa520SLois Curfman McInnes   }
8299566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
8305b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8315b2fa520SLois Curfman McInnes }
8325b2fa520SLois Curfman McInnes 
8339371c9d4SSatish Balay PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
8343501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
83513f74950SBarry Smith   PetscInt           i, j;
836616b8fbbSStefano Zampini   PetscMPIInt        size;
837329f5518SBarry Smith   PetscReal          sum = 0.0;
838637a0070SStefano Zampini   const PetscScalar *av, *v;
8393501a2bdSLois Curfman McInnes 
8403a40ed3dSBarry Smith   PetscFunctionBegin;
8419566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
842637a0070SStefano Zampini   v = av;
8439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
844616b8fbbSStefano Zampini   if (size == 1) {
8459566063dSJacob Faibussowitsch     PetscCall(MatNorm(mdn->A, type, nrm));
8463501a2bdSLois Curfman McInnes   } else {
8473501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
848d0f46423SBarry Smith       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
8499371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
8509371c9d4SSatish Balay         v++;
8513501a2bdSLois Curfman McInnes       }
8521c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
8538f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
8549566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
8553a40ed3dSBarry Smith     } else if (type == NORM_1) {
856329f5518SBarry Smith       PetscReal *tmp, *tmp2;
8579566063dSJacob Faibussowitsch       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
858064f8208SBarry Smith       *nrm = 0.0;
859637a0070SStefano Zampini       v    = av;
860d0f46423SBarry Smith       for (j = 0; j < mdn->A->cmap->n; j++) {
861d0f46423SBarry Smith         for (i = 0; i < mdn->A->rmap->n; i++) {
8629371c9d4SSatish Balay           tmp[j] += PetscAbsScalar(*v);
8639371c9d4SSatish Balay           v++;
8643501a2bdSLois Curfman McInnes         }
8653501a2bdSLois Curfman McInnes       }
8661c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
867d0f46423SBarry Smith       for (j = 0; j < A->cmap->N; j++) {
868064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8693501a2bdSLois Curfman McInnes       }
8709566063dSJacob Faibussowitsch       PetscCall(PetscFree2(tmp, tmp2));
8719566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
8723a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
873329f5518SBarry Smith       PetscReal ntemp;
8749566063dSJacob Faibussowitsch       PetscCall(MatNorm(mdn->A, type, &ntemp));
8751c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
876ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
8773501a2bdSLois Curfman McInnes   }
8789566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
8793a40ed3dSBarry Smith   PetscFunctionReturn(0);
8803501a2bdSLois Curfman McInnes }
8813501a2bdSLois Curfman McInnes 
8829371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
8833501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
8843501a2bdSLois Curfman McInnes   Mat           B;
885d0f46423SBarry Smith   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
886637a0070SStefano Zampini   PetscInt      j, i, lda;
88787828ca2SBarry Smith   PetscScalar  *v;
8883501a2bdSLois Curfman McInnes 
8893a40ed3dSBarry Smith   PetscFunctionBegin;
8907fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
891cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
8929566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
8939566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
8949566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
8959566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
896637a0070SStefano Zampini   } else B = *matout;
8973501a2bdSLois Curfman McInnes 
8989371c9d4SSatish Balay   m = a->A->rmap->n;
8999371c9d4SSatish Balay   n = a->A->cmap->n;
9009566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
9019566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
9029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rwork));
9033501a2bdSLois Curfman McInnes   for (i = 0; i < m; i++) rwork[i] = rstart + i;
9041acff37aSSatish Balay   for (j = 0; j < n; j++) {
9059566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
906637a0070SStefano Zampini     v += lda;
9073501a2bdSLois Curfman McInnes   }
9089566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
9099566063dSJacob Faibussowitsch   PetscCall(PetscFree(rwork));
9109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
9119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
912cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9133501a2bdSLois Curfman McInnes     *matout = B;
9143501a2bdSLois Curfman McInnes   } else {
9159566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
9163501a2bdSLois Curfman McInnes   }
9173a40ed3dSBarry Smith   PetscFunctionReturn(0);
918096963f5SLois Curfman McInnes }
919096963f5SLois Curfman McInnes 
9206849ba73SBarry Smith static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
92152c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
9228965ea79SLois Curfman McInnes 
9239371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIDense(Mat A) {
924273d9f13SBarry Smith   PetscFunctionBegin;
9259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
9269566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
92748a46eb9SPierre Jolivet   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
928273d9f13SBarry Smith   PetscFunctionReturn(0);
929273d9f13SBarry Smith }
930273d9f13SBarry Smith 
9319371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
932488007eeSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
933488007eeSBarry Smith 
934488007eeSBarry Smith   PetscFunctionBegin;
9359566063dSJacob Faibussowitsch   PetscCall(MatAXPY(A->A, alpha, B->A, str));
936488007eeSBarry Smith   PetscFunctionReturn(0);
937488007eeSBarry Smith }
938488007eeSBarry Smith 
9399371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIDense(Mat mat) {
940ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
941ba337c44SJed Brown 
942ba337c44SJed Brown   PetscFunctionBegin;
9439566063dSJacob Faibussowitsch   PetscCall(MatConjugate(a->A));
944ba337c44SJed Brown   PetscFunctionReturn(0);
945ba337c44SJed Brown }
946ba337c44SJed Brown 
9479371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIDense(Mat A) {
948ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
949ba337c44SJed Brown 
950ba337c44SJed Brown   PetscFunctionBegin;
9519566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
952ba337c44SJed Brown   PetscFunctionReturn(0);
953ba337c44SJed Brown }
954ba337c44SJed Brown 
9559371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
956ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
957ba337c44SJed Brown 
958ba337c44SJed Brown   PetscFunctionBegin;
9599566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
960ba337c44SJed Brown   PetscFunctionReturn(0);
961ba337c44SJed Brown }
962ba337c44SJed Brown 
9639371c9d4SSatish Balay static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
964637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
96549a6ff4bSBarry Smith 
96649a6ff4bSBarry Smith   PetscFunctionBegin;
96728b400f6SJacob Faibussowitsch   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
96828b400f6SJacob Faibussowitsch   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
9699566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
97049a6ff4bSBarry Smith   PetscFunctionReturn(0);
97149a6ff4bSBarry Smith }
97249a6ff4bSBarry Smith 
973857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
97452c5f739Sprj- 
9759371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
976a873a8cdSSam Reynolds   PetscInt      i, m, n;
9770716a85fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
9780716a85fSBarry Smith   PetscReal    *work;
9790716a85fSBarry Smith 
9800716a85fSBarry Smith   PetscFunctionBegin;
9819566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
9829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &work));
983857cbf51SRichard Tran Mills   if (type == REDUCTION_MEAN_REALPART) {
9849566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
985857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
9869566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
987a873a8cdSSam Reynolds   } else {
9889566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
989a873a8cdSSam Reynolds   }
990857cbf51SRichard Tran Mills   if (type == NORM_2) {
9910716a85fSBarry Smith     for (i = 0; i < n; i++) work[i] *= work[i];
9920716a85fSBarry Smith   }
993857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
9941c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
9950716a85fSBarry Smith   } else {
9961c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
9970716a85fSBarry Smith   }
9989566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
999857cbf51SRichard Tran Mills   if (type == NORM_2) {
1000a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1001857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1002a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
10030716a85fSBarry Smith   }
10040716a85fSBarry Smith   PetscFunctionReturn(0);
10050716a85fSBarry Smith }
10060716a85fSBarry Smith 
1007637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
10089371c9d4SSatish Balay PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1009126e4e93SJose E. Roman   PetscScalar *da;
1010126e4e93SJose E. Roman   PetscInt     lda;
1011126e4e93SJose E. Roman 
1012126e4e93SJose E. Roman   PetscFunctionBegin;
1013126e4e93SJose E. Roman   PetscCall(MatDenseCUDAGetArray(A, &da));
1014126e4e93SJose E. Roman   PetscCall(MatDenseGetLDA(A, &lda));
1015126e4e93SJose E. Roman   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1016126e4e93SJose E. Roman   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1017126e4e93SJose E. Roman   PetscCall(MatDenseCUDARestoreArray(A, &da));
1018126e4e93SJose E. Roman   PetscFunctionReturn(0);
1019126e4e93SJose E. Roman }
1020126e4e93SJose E. Roman 
10219371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10226947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10236947451fSStefano Zampini   PetscInt      lda;
10246947451fSStefano Zampini 
10256947451fSStefano Zampini   PetscFunctionBegin;
102628b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
102728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1028*4dfa11a4SJacob Faibussowitsch   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
10296947451fSStefano Zampini   a->vecinuse = col + 1;
10309566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10319566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
10329566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
10336947451fSStefano Zampini   *v = a->cvec;
10346947451fSStefano Zampini   PetscFunctionReturn(0);
10356947451fSStefano Zampini }
10366947451fSStefano Zampini 
10379371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10386947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10396947451fSStefano Zampini 
10406947451fSStefano Zampini   PetscFunctionBegin;
104128b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
104228b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
10436947451fSStefano Zampini   a->vecinuse = 0;
10449566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
10459566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1046742765d3SMatthew Knepley   if (v) *v = NULL;
10476947451fSStefano Zampini   PetscFunctionReturn(0);
10486947451fSStefano Zampini }
10496947451fSStefano Zampini 
10509371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10516947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10526947451fSStefano Zampini   PetscInt      lda;
10536947451fSStefano Zampini 
10546947451fSStefano Zampini   PetscFunctionBegin;
105528b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
105628b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1057*4dfa11a4SJacob Faibussowitsch   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
10586947451fSStefano Zampini   a->vecinuse = col + 1;
10599566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10609566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
10619566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
10629566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
10636947451fSStefano Zampini   *v = a->cvec;
10646947451fSStefano Zampini   PetscFunctionReturn(0);
10656947451fSStefano Zampini }
10666947451fSStefano Zampini 
10679371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10686947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10696947451fSStefano Zampini 
10706947451fSStefano Zampini   PetscFunctionBegin;
107128b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
107228b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
10736947451fSStefano Zampini   a->vecinuse = 0;
10749566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
10759566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
10769566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1077742765d3SMatthew Knepley   if (v) *v = NULL;
10786947451fSStefano Zampini   PetscFunctionReturn(0);
10796947451fSStefano Zampini }
10806947451fSStefano Zampini 
10819371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10826947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10836947451fSStefano Zampini   PetscInt      lda;
10846947451fSStefano Zampini 
10856947451fSStefano Zampini   PetscFunctionBegin;
108628b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
108728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1088*4dfa11a4SJacob Faibussowitsch   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
10896947451fSStefano Zampini   a->vecinuse = col + 1;
10909566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10919566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
10929566063dSJacob Faibussowitsch   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
10936947451fSStefano Zampini   *v = a->cvec;
10946947451fSStefano Zampini   PetscFunctionReturn(0);
10956947451fSStefano Zampini }
10966947451fSStefano Zampini 
10979371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
10986947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10996947451fSStefano Zampini 
11006947451fSStefano Zampini   PetscFunctionBegin;
110128b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
110228b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
11036947451fSStefano Zampini   a->vecinuse = 0;
11049566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
11059566063dSJacob Faibussowitsch   PetscCall(VecCUDAResetArray(a->cvec));
1106742765d3SMatthew Knepley   if (v) *v = NULL;
11076947451fSStefano Zampini   PetscFunctionReturn(0);
11086947451fSStefano Zampini }
11096947451fSStefano Zampini 
11109371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1111637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1112637a0070SStefano Zampini 
1113637a0070SStefano Zampini   PetscFunctionBegin;
111428b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
111528b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11169566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1117637a0070SStefano Zampini   PetscFunctionReturn(0);
1118637a0070SStefano Zampini }
1119637a0070SStefano Zampini 
11209371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1121637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1122637a0070SStefano Zampini 
1123637a0070SStefano Zampini   PetscFunctionBegin;
112428b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
112528b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11269566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAResetArray(l->A));
1127637a0070SStefano Zampini   PetscFunctionReturn(0);
1128637a0070SStefano Zampini }
1129637a0070SStefano Zampini 
11309371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1131d5ea218eSStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1132d5ea218eSStefano Zampini 
1133d5ea218eSStefano Zampini   PetscFunctionBegin;
113428b400f6SJacob Faibussowitsch   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
113528b400f6SJacob Faibussowitsch   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
11369566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1137d5ea218eSStefano Zampini   PetscFunctionReturn(0);
1138d5ea218eSStefano Zampini }
1139d5ea218eSStefano Zampini 
11409371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1141637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1142637a0070SStefano Zampini 
1143637a0070SStefano Zampini   PetscFunctionBegin;
11449566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1145637a0070SStefano Zampini   PetscFunctionReturn(0);
1146637a0070SStefano Zampini }
1147637a0070SStefano Zampini 
11489371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1149637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1150637a0070SStefano Zampini 
1151637a0070SStefano Zampini   PetscFunctionBegin;
11529566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1153637a0070SStefano Zampini   PetscFunctionReturn(0);
1154637a0070SStefano Zampini }
1155637a0070SStefano Zampini 
11569371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1157637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1158637a0070SStefano Zampini 
1159637a0070SStefano Zampini   PetscFunctionBegin;
11609566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1161637a0070SStefano Zampini   PetscFunctionReturn(0);
1162637a0070SStefano Zampini }
1163637a0070SStefano Zampini 
11649371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1165637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1166637a0070SStefano Zampini 
1167637a0070SStefano Zampini   PetscFunctionBegin;
11689566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1169637a0070SStefano Zampini   PetscFunctionReturn(0);
1170637a0070SStefano Zampini }
1171637a0070SStefano Zampini 
11729371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1173637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1174637a0070SStefano Zampini 
1175637a0070SStefano Zampini   PetscFunctionBegin;
11769566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDAGetArray(l->A, a));
1177637a0070SStefano Zampini   PetscFunctionReturn(0);
1178637a0070SStefano Zampini }
1179637a0070SStefano Zampini 
11809371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1181637a0070SStefano Zampini   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1182637a0070SStefano Zampini 
1183637a0070SStefano Zampini   PetscFunctionBegin;
11849566063dSJacob Faibussowitsch   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1185637a0070SStefano Zampini   PetscFunctionReturn(0);
1186637a0070SStefano Zampini }
1187637a0070SStefano Zampini 
11886947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
11896947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
11906947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
11916947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
11926947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
11936947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
11945ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
11956947451fSStefano Zampini 
11969371c9d4SSatish Balay static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1197637a0070SStefano Zampini   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1198637a0070SStefano Zampini 
1199637a0070SStefano Zampini   PetscFunctionBegin;
120028b400f6SJacob Faibussowitsch   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
120128b400f6SJacob Faibussowitsch   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
12021baa6e33SBarry Smith   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1203637a0070SStefano Zampini   mat->boundtocpu = bind;
12046947451fSStefano Zampini   if (!bind) {
12056947451fSStefano Zampini     PetscBool iscuda;
12066947451fSStefano Zampini 
12073faff063SStefano Zampini     PetscCall(PetscFree(mat->defaultrandtype));
12083faff063SStefano Zampini     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
12099566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
121048a46eb9SPierre Jolivet     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
12119566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
121248a46eb9SPierre Jolivet     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
12139566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
12149566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
12159566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
12169566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
12179566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
12189566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1219126e4e93SJose E. Roman     mat->ops->shift = MatShift_MPIDenseCUDA;
12206947451fSStefano Zampini   } else {
12213faff063SStefano Zampini     PetscCall(PetscFree(mat->defaultrandtype));
12223faff063SStefano Zampini     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
12239566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
12249566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
12259566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
12269566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
12279566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
12289566063dSJacob Faibussowitsch     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1229126e4e93SJose E. Roman     mat->ops->shift = MatShift_MPIDense;
1230616b8fbbSStefano Zampini   }
12311baa6e33SBarry Smith   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1232637a0070SStefano Zampini   PetscFunctionReturn(0);
1233637a0070SStefano Zampini }
1234637a0070SStefano Zampini 
12359371c9d4SSatish Balay PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1236637a0070SStefano Zampini   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1237637a0070SStefano Zampini   PetscBool     iscuda;
1238637a0070SStefano Zampini 
1239637a0070SStefano Zampini   PetscFunctionBegin;
1240d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
12419566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1242637a0070SStefano Zampini   if (!iscuda) PetscFunctionReturn(0);
12439566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
12449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
1245637a0070SStefano Zampini   if (!d->A) {
12469566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
12479566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1248637a0070SStefano Zampini   }
12499566063dSJacob Faibussowitsch   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
12509566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1251637a0070SStefano Zampini   A->preallocated = PETSC_TRUE;
12526f08ad05SPierre Jolivet   A->assembled    = PETSC_TRUE;
1253637a0070SStefano Zampini   PetscFunctionReturn(0);
1254637a0070SStefano Zampini }
1255637a0070SStefano Zampini #endif
1256637a0070SStefano Zampini 
12579371c9d4SSatish Balay static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
125873a71a0fSBarry Smith   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
125973a71a0fSBarry Smith 
126073a71a0fSBarry Smith   PetscFunctionBegin;
12619566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(d->A, rctx));
12623faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
12633faff063SStefano Zampini   x->offloadmask = d->A->offloadmask;
12643faff063SStefano Zampini #endif
126573a71a0fSBarry Smith   PetscFunctionReturn(0);
126673a71a0fSBarry Smith }
126773a71a0fSBarry Smith 
12689371c9d4SSatish Balay static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
12693b49f96aSBarry Smith   PetscFunctionBegin;
12703b49f96aSBarry Smith   *missing = PETSC_FALSE;
12713b49f96aSBarry Smith   PetscFunctionReturn(0);
12723b49f96aSBarry Smith }
12733b49f96aSBarry Smith 
12744222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1275cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12766718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1277a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12786718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
12796718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1280cc48ffa7SToby Isaac 
12818965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
128209dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
128309dc0095SBarry Smith                                        MatGetRow_MPIDense,
128409dc0095SBarry Smith                                        MatRestoreRow_MPIDense,
128509dc0095SBarry Smith                                        MatMult_MPIDense,
128697304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPIDense,
12877c922b88SBarry Smith                                        MatMultTranspose_MPIDense,
12887c922b88SBarry Smith                                        MatMultTransposeAdd_MPIDense,
1289f4259b30SLisandro Dalcin                                        NULL,
1290f4259b30SLisandro Dalcin                                        NULL,
1291f4259b30SLisandro Dalcin                                        NULL,
1292f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1293f4259b30SLisandro Dalcin                                        NULL,
1294f4259b30SLisandro Dalcin                                        NULL,
1295f4259b30SLisandro Dalcin                                        NULL,
129609dc0095SBarry Smith                                        MatTranspose_MPIDense,
129797304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPIDense,
12986e4ee0c6SHong Zhang                                        MatEqual_MPIDense,
129909dc0095SBarry Smith                                        MatGetDiagonal_MPIDense,
13005b2fa520SLois Curfman McInnes                                        MatDiagonalScale_MPIDense,
130109dc0095SBarry Smith                                        MatNorm_MPIDense,
130297304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPIDense,
130309dc0095SBarry Smith                                        MatAssemblyEnd_MPIDense,
130409dc0095SBarry Smith                                        MatSetOption_MPIDense,
130509dc0095SBarry Smith                                        MatZeroEntries_MPIDense,
1306d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_MPIDense,
1307f4259b30SLisandro Dalcin                                        NULL,
1308f4259b30SLisandro Dalcin                                        NULL,
1309f4259b30SLisandro Dalcin                                        NULL,
1310f4259b30SLisandro Dalcin                                        NULL,
13114994cf47SJed Brown                                        /* 29*/ MatSetUp_MPIDense,
1312f4259b30SLisandro Dalcin                                        NULL,
1313f4259b30SLisandro Dalcin                                        NULL,
1314c56a70eeSBarry Smith                                        MatGetDiagonalBlock_MPIDense,
1315f4259b30SLisandro Dalcin                                        NULL,
1316d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPIDense,
1317f4259b30SLisandro Dalcin                                        NULL,
1318f4259b30SLisandro Dalcin                                        NULL,
1319f4259b30SLisandro Dalcin                                        NULL,
1320f4259b30SLisandro Dalcin                                        NULL,
1321d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPIDense,
13227dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIDense,
1323f4259b30SLisandro Dalcin                                        NULL,
132409dc0095SBarry Smith                                        MatGetValues_MPIDense,
1325a76f77c3SStefano Zampini                                        MatCopy_MPIDense,
1326f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
132709dc0095SBarry Smith                                        MatScale_MPIDense,
13282f605a99SJose E. Roman                                        MatShift_MPIDense,
1329f4259b30SLisandro Dalcin                                        NULL,
1330f4259b30SLisandro Dalcin                                        NULL,
133173a71a0fSBarry Smith                                        /* 49*/ MatSetRandom_MPIDense,
1332f4259b30SLisandro Dalcin                                        NULL,
1333f4259b30SLisandro Dalcin                                        NULL,
1334f4259b30SLisandro Dalcin                                        NULL,
1335f4259b30SLisandro Dalcin                                        NULL,
1336f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1337f4259b30SLisandro Dalcin                                        NULL,
1338f4259b30SLisandro Dalcin                                        NULL,
1339f4259b30SLisandro Dalcin                                        NULL,
1340f4259b30SLisandro Dalcin                                        NULL,
13417dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1342b9b97703SBarry Smith                                        MatDestroy_MPIDense,
1343b9b97703SBarry Smith                                        MatView_MPIDense,
1344f4259b30SLisandro Dalcin                                        NULL,
1345f4259b30SLisandro Dalcin                                        NULL,
1346f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1347f4259b30SLisandro Dalcin                                        NULL,
1348f4259b30SLisandro Dalcin                                        NULL,
1349f4259b30SLisandro Dalcin                                        NULL,
1350f4259b30SLisandro Dalcin                                        NULL,
1351f4259b30SLisandro Dalcin                                        /* 69*/ NULL,
1352f4259b30SLisandro Dalcin                                        NULL,
1353f4259b30SLisandro Dalcin                                        NULL,
1354f4259b30SLisandro Dalcin                                        NULL,
1355f4259b30SLisandro Dalcin                                        NULL,
1356f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1357f4259b30SLisandro Dalcin                                        NULL,
1358f4259b30SLisandro Dalcin                                        NULL,
1359f4259b30SLisandro Dalcin                                        NULL,
1360f4259b30SLisandro Dalcin                                        NULL,
1361f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1362f4259b30SLisandro Dalcin                                        NULL,
1363f4259b30SLisandro Dalcin                                        NULL,
1364f4259b30SLisandro Dalcin                                        NULL,
13655bba2384SShri Abhyankar                                        /* 83*/ MatLoad_MPIDense,
1366f4259b30SLisandro Dalcin                                        NULL,
1367f4259b30SLisandro Dalcin                                        NULL,
1368f4259b30SLisandro Dalcin                                        NULL,
1369f4259b30SLisandro Dalcin                                        NULL,
1370f4259b30SLisandro Dalcin                                        NULL,
1371f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1372f4259b30SLisandro Dalcin                                        NULL,
1373f4259b30SLisandro Dalcin                                        NULL,
1374f4259b30SLisandro Dalcin                                        NULL,
1375f4259b30SLisandro Dalcin                                        NULL,
1376f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1377f4259b30SLisandro Dalcin                                        NULL,
13786718818eSStefano Zampini                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1379cc48ffa7SToby Isaac                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1380f4259b30SLisandro Dalcin                                        NULL,
13814222ddf1SHong Zhang                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1382f4259b30SLisandro Dalcin                                        NULL,
1383f4259b30SLisandro Dalcin                                        NULL,
1384ba337c44SJed Brown                                        MatConjugate_MPIDense,
1385f4259b30SLisandro Dalcin                                        NULL,
1386f4259b30SLisandro Dalcin                                        /*104*/ NULL,
1387ba337c44SJed Brown                                        MatRealPart_MPIDense,
1388ba337c44SJed Brown                                        MatImaginaryPart_MPIDense,
1389f4259b30SLisandro Dalcin                                        NULL,
1390f4259b30SLisandro Dalcin                                        NULL,
1391f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1392f4259b30SLisandro Dalcin                                        NULL,
1393f4259b30SLisandro Dalcin                                        NULL,
139449a6ff4bSBarry Smith                                        MatGetColumnVector_MPIDense,
13953b49f96aSBarry Smith                                        MatMissingDiagonal_MPIDense,
1396f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1397f4259b30SLisandro Dalcin                                        NULL,
1398f4259b30SLisandro Dalcin                                        NULL,
1399f4259b30SLisandro Dalcin                                        NULL,
1400f4259b30SLisandro Dalcin                                        NULL,
1401f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1402f4259b30SLisandro Dalcin                                        NULL,
1403f4259b30SLisandro Dalcin                                        NULL,
1404f4259b30SLisandro Dalcin                                        NULL,
1405f4259b30SLisandro Dalcin                                        NULL,
1406f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1407a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIDense,
1408f4259b30SLisandro Dalcin                                        NULL,
1409f4259b30SLisandro Dalcin                                        NULL,
1410f4259b30SLisandro Dalcin                                        NULL,
1411f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1412f4259b30SLisandro Dalcin                                        NULL,
14136718818eSStefano Zampini                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1414cb20be35SHong Zhang                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1415f4259b30SLisandro Dalcin                                        NULL,
1416f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1417f4259b30SLisandro Dalcin                                        NULL,
1418f4259b30SLisandro Dalcin                                        NULL,
1419f4259b30SLisandro Dalcin                                        NULL,
1420f4259b30SLisandro Dalcin                                        NULL,
1421f4259b30SLisandro Dalcin                                        /*139*/ NULL,
1422f4259b30SLisandro Dalcin                                        NULL,
1423f4259b30SLisandro Dalcin                                        NULL,
1424f4259b30SLisandro Dalcin                                        NULL,
1425f4259b30SLisandro Dalcin                                        NULL,
14264222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1427f4259b30SLisandro Dalcin                                        /*145*/ NULL,
1428f4259b30SLisandro Dalcin                                        NULL,
142999a7f59eSMark Adams                                        NULL,
143099a7f59eSMark Adams                                        NULL,
14317fb60732SBarry Smith                                        NULL,
14329371c9d4SSatish Balay                                        /*150*/ NULL};
14338965ea79SLois Curfman McInnes 
14349371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1435637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1436637a0070SStefano Zampini   PetscBool     iscuda;
1437a23d5eceSKris Buschelman 
1438a23d5eceSKris Buschelman   PetscFunctionBegin;
143928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
14409566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
14419566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
1442637a0070SStefano Zampini   if (!a->A) {
14439566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
14449566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1445637a0070SStefano Zampini   }
14469566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
14479566063dSJacob Faibussowitsch   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
14489566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
14493faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
14503faff063SStefano Zampini   mat->offloadmask = a->A->offloadmask;
14513faff063SStefano Zampini #endif
1452637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
14536f08ad05SPierre Jolivet   mat->assembled    = PETSC_TRUE;
1454a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1455a23d5eceSKris Buschelman }
1456a23d5eceSKris Buschelman 
14579371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
14584e29119aSPierre Jolivet   Mat B, C;
14594e29119aSPierre Jolivet 
14604e29119aSPierre Jolivet   PetscFunctionBegin;
14619566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
14629566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
14639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&C));
14644e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14654e29119aSPierre Jolivet     C = *newmat;
14664e29119aSPierre Jolivet   } else C = NULL;
14679566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14689566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14694e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14709566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14714e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14724e29119aSPierre Jolivet   PetscFunctionReturn(0);
14734e29119aSPierre Jolivet }
14744e29119aSPierre Jolivet 
14759371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
14764e29119aSPierre Jolivet   Mat B, C;
14774e29119aSPierre Jolivet 
14784e29119aSPierre Jolivet   PetscFunctionBegin;
14799566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLocalMatrix(A, &C));
14809566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
14814e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14824e29119aSPierre Jolivet     C = *newmat;
14834e29119aSPierre Jolivet   } else C = NULL;
14849566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14859566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14864e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14879566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14884e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14894e29119aSPierre Jolivet   PetscFunctionReturn(0);
14904e29119aSPierre Jolivet }
14914e29119aSPierre Jolivet 
149265b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
14939371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
14948ea901baSHong Zhang   Mat          mat_elemental;
149532d7a744SHong Zhang   PetscScalar *v;
149632d7a744SHong Zhang   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
14978ea901baSHong Zhang 
14988baccfbdSHong Zhang   PetscFunctionBegin;
1499378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1500378336b6SHong Zhang     mat_elemental = *newmat;
15019566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(*newmat));
1502378336b6SHong Zhang   } else {
15039566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
15049566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
15059566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
15069566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_elemental));
15079566063dSJacob Faibussowitsch     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1508378336b6SHong Zhang   }
1509378336b6SHong Zhang 
15109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &rows, N, &cols));
151132d7a744SHong Zhang   for (i = 0; i < N; i++) cols[i] = i;
151232d7a744SHong Zhang   for (i = 0; i < m; i++) rows[i] = rstart + i;
15138ea901baSHong Zhang 
1514637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
15159566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(A, &v));
15169566063dSJacob Faibussowitsch   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
15179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
15189566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
15199566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &v));
15209566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, cols));
15218ea901baSHong Zhang 
1522511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
15239566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &mat_elemental));
15248ea901baSHong Zhang   } else {
15258ea901baSHong Zhang     *newmat = mat_elemental;
15268ea901baSHong Zhang   }
15278baccfbdSHong Zhang   PetscFunctionReturn(0);
15288baccfbdSHong Zhang }
152965b80a83SHong Zhang #endif
15308baccfbdSHong Zhang 
15319371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
153286aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
153386aefd0dSHong Zhang 
153486aefd0dSHong Zhang   PetscFunctionBegin;
15359566063dSJacob Faibussowitsch   PetscCall(MatDenseGetColumn(mat->A, col, vals));
153686aefd0dSHong Zhang   PetscFunctionReturn(0);
153786aefd0dSHong Zhang }
153886aefd0dSHong Zhang 
15399371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
154086aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
154186aefd0dSHong Zhang 
154286aefd0dSHong Zhang   PetscFunctionBegin;
15439566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreColumn(mat->A, vals));
154486aefd0dSHong Zhang   PetscFunctionReturn(0);
154586aefd0dSHong Zhang }
154686aefd0dSHong Zhang 
15479371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
154894e2cb23SJakub Kruzik   Mat_MPIDense *mat;
154994e2cb23SJakub Kruzik   PetscInt      m, nloc, N;
155094e2cb23SJakub Kruzik 
155194e2cb23SJakub Kruzik   PetscFunctionBegin;
15529566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
15539566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
155494e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
155594e2cb23SJakub Kruzik     PetscInt sum;
155694e2cb23SJakub Kruzik 
155748a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
155894e2cb23SJakub Kruzik     /* Check sum(n) = N */
15591c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
156008401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
156194e2cb23SJakub Kruzik 
15629566063dSJacob Faibussowitsch     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
15639566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
156494e2cb23SJakub Kruzik   }
156594e2cb23SJakub Kruzik 
156694e2cb23SJakub Kruzik   /* numeric phase */
156794e2cb23SJakub Kruzik   mat = (Mat_MPIDense *)(*outmat)->data;
15689566063dSJacob Faibussowitsch   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
156994e2cb23SJakub Kruzik   PetscFunctionReturn(0);
157094e2cb23SJakub Kruzik }
157194e2cb23SJakub Kruzik 
1572637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
15739371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1574637a0070SStefano Zampini   Mat           B;
1575637a0070SStefano Zampini   Mat_MPIDense *m;
1576637a0070SStefano Zampini 
1577637a0070SStefano Zampini   PetscFunctionBegin;
1578637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
15799566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1580637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
15819566063dSJacob Faibussowitsch     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1582637a0070SStefano Zampini   }
1583637a0070SStefano Zampini 
1584637a0070SStefano Zampini   B = *newmat;
15859566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
15869566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->defaultvectype));
15879566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
15889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
15899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
15909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
15919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
15929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
15939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
15949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
15959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
15969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
15979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
15989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
15999566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
16009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
16019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
16029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1603637a0070SStefano Zampini   m = (Mat_MPIDense *)(B)->data;
160448a46eb9SPierre Jolivet   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1605637a0070SStefano Zampini   B->ops->bindtocpu = NULL;
1606637a0070SStefano Zampini   B->offloadmask    = PETSC_OFFLOAD_CPU;
1607637a0070SStefano Zampini   PetscFunctionReturn(0);
1608637a0070SStefano Zampini }
1609637a0070SStefano Zampini 
16109371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1611637a0070SStefano Zampini   Mat           B;
1612637a0070SStefano Zampini   Mat_MPIDense *m;
1613637a0070SStefano Zampini 
1614637a0070SStefano Zampini   PetscFunctionBegin;
1615637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
16169566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1617637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
16189566063dSJacob Faibussowitsch     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1619637a0070SStefano Zampini   }
1620637a0070SStefano Zampini 
1621637a0070SStefano Zampini   B = *newmat;
16229566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->defaultvectype));
16239566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
16249566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
16259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
16269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
16279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
16289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
16299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
16309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
16319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
16329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
16339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
16349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
16359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
16369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
16379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
16389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1639f5ff9c66SStefano Zampini   m = (Mat_MPIDense *)(B->data);
1640637a0070SStefano Zampini   if (m->A) {
16419566063dSJacob Faibussowitsch     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1642637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_BOTH;
1643637a0070SStefano Zampini   } else {
1644637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1645637a0070SStefano Zampini   }
16469566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1647637a0070SStefano Zampini 
1648637a0070SStefano Zampini   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1649637a0070SStefano Zampini   PetscFunctionReturn(0);
1650637a0070SStefano Zampini }
1651637a0070SStefano Zampini #endif
1652637a0070SStefano Zampini 
16539371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
16546947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16556947451fSStefano Zampini   PetscInt      lda;
16566947451fSStefano Zampini 
16576947451fSStefano Zampini   PetscFunctionBegin;
165828b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
165928b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
166048a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
16616947451fSStefano Zampini   a->vecinuse = col + 1;
16629566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
16639566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
16649566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
16656947451fSStefano Zampini   *v = a->cvec;
16666947451fSStefano Zampini   PetscFunctionReturn(0);
16676947451fSStefano Zampini }
16686947451fSStefano Zampini 
16699371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
16706947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16716947451fSStefano Zampini 
16726947451fSStefano Zampini   PetscFunctionBegin;
167328b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
167428b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
16756947451fSStefano Zampini   a->vecinuse = 0;
16769566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
16779566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1678742765d3SMatthew Knepley   if (v) *v = NULL;
16796947451fSStefano Zampini   PetscFunctionReturn(0);
16806947451fSStefano Zampini }
16816947451fSStefano Zampini 
16829371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
16836947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16846947451fSStefano Zampini   PetscInt      lda;
16856947451fSStefano Zampini 
16866947451fSStefano Zampini   PetscFunctionBegin;
168728b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
168828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
168948a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
16906947451fSStefano Zampini   a->vecinuse = col + 1;
16919566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
16929566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
16939566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
16949566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
16956947451fSStefano Zampini   *v = a->cvec;
16966947451fSStefano Zampini   PetscFunctionReturn(0);
16976947451fSStefano Zampini }
16986947451fSStefano Zampini 
16999371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
17006947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17016947451fSStefano Zampini 
17026947451fSStefano Zampini   PetscFunctionBegin;
17035f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
17045f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
17056947451fSStefano Zampini   a->vecinuse = 0;
17069566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
17079566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
17089566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1709742765d3SMatthew Knepley   if (v) *v = NULL;
17106947451fSStefano Zampini   PetscFunctionReturn(0);
17116947451fSStefano Zampini }
17126947451fSStefano Zampini 
17139371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
17146947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17156947451fSStefano Zampini   PetscInt      lda;
17166947451fSStefano Zampini 
17176947451fSStefano Zampini   PetscFunctionBegin;
17185f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
17195f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
172048a46eb9SPierre Jolivet   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
17216947451fSStefano Zampini   a->vecinuse = col + 1;
17229566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
17239566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
17249566063dSJacob Faibussowitsch   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
17256947451fSStefano Zampini   *v = a->cvec;
17266947451fSStefano Zampini   PetscFunctionReturn(0);
17276947451fSStefano Zampini }
17286947451fSStefano Zampini 
17299371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
17306947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
17316947451fSStefano Zampini 
17326947451fSStefano Zampini   PetscFunctionBegin;
17335f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
17345f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
17356947451fSStefano Zampini   a->vecinuse = 0;
17369566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
17379566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1738742765d3SMatthew Knepley   if (v) *v = NULL;
17396947451fSStefano Zampini   PetscFunctionReturn(0);
17406947451fSStefano Zampini }
17416947451fSStefano Zampini 
17429371c9d4SSatish Balay PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
17435ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1744616b8fbbSStefano Zampini   Mat_MPIDense *c;
1745616b8fbbSStefano Zampini   MPI_Comm      comm;
1746a2748737SPierre Jolivet   PetscInt      pbegin, pend;
17475ea7661aSPierre Jolivet 
17485ea7661aSPierre Jolivet   PetscFunctionBegin;
17499566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
17505f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
17515f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1752a2748737SPierre Jolivet   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1753a2748737SPierre Jolivet   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
17545ea7661aSPierre Jolivet   if (!a->cmat) {
17559566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &a->cmat));
17569566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1757a2748737SPierre Jolivet     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1758a2748737SPierre Jolivet     else {
1759a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1760a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1761a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1762a2748737SPierre Jolivet     }
17639566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
17649566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1765a2748737SPierre Jolivet   } else {
1766a2748737SPierre Jolivet     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1767a2748737SPierre Jolivet     if (same && a->cmat->rmap->N != A->rmap->N) {
1768a2748737SPierre Jolivet       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1769a2748737SPierre Jolivet       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1770a2748737SPierre Jolivet     }
1771a2748737SPierre Jolivet     if (!same) {
1772a2748737SPierre Jolivet       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1773a2748737SPierre Jolivet       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1774a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1775a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1776a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1777a2748737SPierre Jolivet     }
1778a2748737SPierre Jolivet     if (cend - cbegin != a->cmat->cmap->N) {
17799566063dSJacob Faibussowitsch       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
17809566063dSJacob Faibussowitsch       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
17819566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
17829566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
17835ea7661aSPierre Jolivet     }
1784a2748737SPierre Jolivet   }
1785616b8fbbSStefano Zampini   c = (Mat_MPIDense *)a->cmat->data;
17865f80ce2aSJacob Faibussowitsch   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1787a2748737SPierre Jolivet   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
17883faff063SStefano Zampini 
1789616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1790616b8fbbSStefano Zampini   a->cmat->assembled    = PETSC_TRUE;
17913faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
17923faff063SStefano Zampini   a->cmat->offloadmask = c->A->offloadmask;
17933faff063SStefano Zampini #endif
17945ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
17955ea7661aSPierre Jolivet   *v          = a->cmat;
17965ea7661aSPierre Jolivet   PetscFunctionReturn(0);
17975ea7661aSPierre Jolivet }
17985ea7661aSPierre Jolivet 
17999371c9d4SSatish Balay PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
18005ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1801616b8fbbSStefano Zampini   Mat_MPIDense *c;
18025ea7661aSPierre Jolivet 
18035ea7661aSPierre Jolivet   PetscFunctionBegin;
18045f80ce2aSJacob Faibussowitsch   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
18055f80ce2aSJacob Faibussowitsch   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
18065f80ce2aSJacob Faibussowitsch   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
18075ea7661aSPierre Jolivet   a->matinuse = 0;
1808616b8fbbSStefano Zampini   c           = (Mat_MPIDense *)a->cmat->data;
18099566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1810742765d3SMatthew Knepley   if (v) *v = NULL;
18113faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
18123faff063SStefano Zampini   A->offloadmask = a->A->offloadmask;
18133faff063SStefano Zampini #endif
18145ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18155ea7661aSPierre Jolivet }
18165ea7661aSPierre Jolivet 
1817002bc6daSRichard Tran Mills /*MC
1818002bc6daSRichard Tran Mills    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1819002bc6daSRichard Tran Mills 
1820002bc6daSRichard Tran Mills    Options Database Keys:
182111a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1822002bc6daSRichard Tran Mills 
1823002bc6daSRichard Tran Mills   Level: beginner
1824002bc6daSRichard Tran Mills 
182511a5261eSBarry Smith .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1826002bc6daSRichard Tran Mills M*/
18279371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1828273d9f13SBarry Smith   Mat_MPIDense *a;
1829273d9f13SBarry Smith 
1830273d9f13SBarry Smith   PetscFunctionBegin;
1831*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1832b0a32e0cSBarry Smith   mat->data = (void *)a;
18339566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1834273d9f13SBarry Smith 
1835273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1836273d9f13SBarry Smith 
1837273d9f13SBarry Smith   /* build cache for off array entries formed */
1838273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
18392205254eSKarl Rupp 
18409566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1841273d9f13SBarry Smith 
1842273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1843f4259b30SLisandro Dalcin   a->lvec        = NULL;
1844f4259b30SLisandro Dalcin   a->Mvctx       = NULL;
1845273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1846273d9f13SBarry Smith 
18479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
18489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
18499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
18509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
18519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
18529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
18539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
18549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
18559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
18569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
18579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
18589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
18599566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
18609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
18619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
18629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
18639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
18649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
18659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
18669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
18679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
18688baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
18699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
18708baccfbdSHong Zhang #endif
1871d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
18729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1873d24d4204SJose E. Roman #endif
1874637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
18759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1876637a0070SStefano Zampini #endif
18779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
18789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
18799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
18806718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
18819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
18829566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
18836718818eSStefano Zampini #endif
18848949adfdSHong Zhang 
18859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
18869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
18879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1888273d9f13SBarry Smith   PetscFunctionReturn(0);
1889273d9f13SBarry Smith }
1890273d9f13SBarry Smith 
1891209238afSKris Buschelman /*MC
1892637a0070SStefano Zampini    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1893637a0070SStefano Zampini 
1894637a0070SStefano Zampini    Options Database Keys:
189511a5261eSBarry Smith . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1896637a0070SStefano Zampini 
1897637a0070SStefano Zampini   Level: beginner
1898637a0070SStefano Zampini 
189911a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1900637a0070SStefano Zampini M*/
1901637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1902a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h>
19039371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1904637a0070SStefano Zampini   PetscFunctionBegin;
19059566063dSJacob Faibussowitsch   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
19069566063dSJacob Faibussowitsch   PetscCall(MatCreate_MPIDense(B));
19079566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1908637a0070SStefano Zampini   PetscFunctionReturn(0);
1909637a0070SStefano Zampini }
1910637a0070SStefano Zampini #endif
1911637a0070SStefano Zampini 
1912637a0070SStefano Zampini /*MC
1913002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1914209238afSKris Buschelman 
191511a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
191611a5261eSBarry Smith    and `MATMPIDENSE` otherwise.
1917209238afSKris Buschelman 
1918209238afSKris Buschelman    Options Database Keys:
191911a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1920209238afSKris Buschelman 
1921209238afSKris Buschelman   Level: beginner
1922209238afSKris Buschelman 
1923c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
19246947451fSStefano Zampini M*/
19256947451fSStefano Zampini 
19266947451fSStefano Zampini /*MC
19276947451fSStefano Zampini    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
19286947451fSStefano Zampini 
192911a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
193011a5261eSBarry Smith    and `MATMPIDENSECUDA` otherwise.
19316947451fSStefano Zampini 
19326947451fSStefano Zampini    Options Database Keys:
193311a5261eSBarry Smith . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
19346947451fSStefano Zampini 
19356947451fSStefano Zampini   Level: beginner
19366947451fSStefano Zampini 
1937c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1938209238afSKris Buschelman M*/
1939209238afSKris Buschelman 
1940273d9f13SBarry Smith /*@C
1941273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1942273d9f13SBarry Smith 
19438f490ea3SStefano Zampini    Collective
1944273d9f13SBarry Smith 
1945273d9f13SBarry Smith    Input Parameters:
19461c4f3114SJed Brown .  B - the matrix
19470298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1948273d9f13SBarry Smith    to control all matrix memory allocation.
1949273d9f13SBarry Smith 
1950273d9f13SBarry Smith    Notes:
1951273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1952273d9f13SBarry Smith    storage by columns.
1953273d9f13SBarry Smith 
1954273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1955273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
19560298fd71SBarry Smith    set data=NULL.
1957273d9f13SBarry Smith 
1958273d9f13SBarry Smith    Level: intermediate
1959273d9f13SBarry Smith 
196011a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1961273d9f13SBarry Smith @*/
19629371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1963273d9f13SBarry Smith   PetscFunctionBegin;
1964d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1965cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1966273d9f13SBarry Smith   PetscFunctionReturn(0);
1967273d9f13SBarry Smith }
1968273d9f13SBarry Smith 
1969d3042a70SBarry Smith /*@
197011a5261eSBarry Smith    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1971d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1972d3042a70SBarry Smith    into a matrix
1973d3042a70SBarry Smith 
1974d3042a70SBarry Smith    Not Collective
1975d3042a70SBarry Smith 
1976d3042a70SBarry Smith    Input Parameters:
1977d3042a70SBarry Smith +  mat - the matrix
1978d3042a70SBarry Smith -  array - the array in column major order
1979d3042a70SBarry Smith 
198011a5261eSBarry Smith    Note:
198111a5261eSBarry Smith    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1982d3042a70SBarry Smith    freed when the matrix is destroyed.
1983d3042a70SBarry Smith 
1984d3042a70SBarry Smith    Level: developer
1985d3042a70SBarry Smith 
198611a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
198711a5261eSBarry Smith           `MatDenseReplaceArray()`
1988d3042a70SBarry Smith @*/
19899371c9d4SSatish Balay PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
1990d3042a70SBarry Smith   PetscFunctionBegin;
1991d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1992cac4c232SBarry Smith   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
19939566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
19943faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
1995637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1996637a0070SStefano Zampini #endif
1997d3042a70SBarry Smith   PetscFunctionReturn(0);
1998d3042a70SBarry Smith }
1999d3042a70SBarry Smith 
2000d3042a70SBarry Smith /*@
200111a5261eSBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2002d3042a70SBarry Smith 
2003d3042a70SBarry Smith    Not Collective
2004d3042a70SBarry Smith 
2005d3042a70SBarry Smith    Input Parameters:
2006d3042a70SBarry Smith .  mat - the matrix
2007d3042a70SBarry Smith 
200811a5261eSBarry Smith    Note:
200911a5261eSBarry Smith    You can only call this after a call to `MatDensePlaceArray()`
2010d3042a70SBarry Smith 
2011d3042a70SBarry Smith    Level: developer
2012d3042a70SBarry Smith 
201311a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2014d3042a70SBarry Smith @*/
20159371c9d4SSatish Balay PetscErrorCode MatDenseResetArray(Mat mat) {
2016d3042a70SBarry Smith   PetscFunctionBegin;
2017d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2018cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
20199566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2020d3042a70SBarry Smith   PetscFunctionReturn(0);
2021d3042a70SBarry Smith }
2022d3042a70SBarry Smith 
2023d5ea218eSStefano Zampini /*@
2024d5ea218eSStefano Zampini    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2025d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2026d5ea218eSStefano Zampini    into a matrix
2027d5ea218eSStefano Zampini 
2028d5ea218eSStefano Zampini    Not Collective
2029d5ea218eSStefano Zampini 
2030d5ea218eSStefano Zampini    Input Parameters:
2031d5ea218eSStefano Zampini +  mat - the matrix
2032d5ea218eSStefano Zampini -  array - the array in column major order
2033d5ea218eSStefano Zampini 
203411a5261eSBarry Smith    Note:
203511a5261eSBarry Smith    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2036d5ea218eSStefano Zampini    freed by the user. It will be freed when the matrix is destroyed.
2037d5ea218eSStefano Zampini 
2038d5ea218eSStefano Zampini    Level: developer
2039d5ea218eSStefano Zampini 
204011a5261eSBarry Smith .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2041d5ea218eSStefano Zampini @*/
20429371c9d4SSatish Balay PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2043d5ea218eSStefano Zampini   PetscFunctionBegin;
2044d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2045cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
20469566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
20473faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
2048d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2049d5ea218eSStefano Zampini #endif
2050d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2051d5ea218eSStefano Zampini }
2052d5ea218eSStefano Zampini 
2053637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
20548965ea79SLois Curfman McInnes /*@C
205511a5261eSBarry Smith    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2056637a0070SStefano Zampini    array provided by the user. This is useful to avoid copying an array
2057637a0070SStefano Zampini    into a matrix
2058637a0070SStefano Zampini 
2059637a0070SStefano Zampini    Not Collective
2060637a0070SStefano Zampini 
2061637a0070SStefano Zampini    Input Parameters:
2062637a0070SStefano Zampini +  mat - the matrix
2063637a0070SStefano Zampini -  array - the array in column major order
2064637a0070SStefano Zampini 
206511a5261eSBarry Smith    Note:
206611a5261eSBarry Smith    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2067637a0070SStefano Zampini    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2068637a0070SStefano Zampini 
2069637a0070SStefano Zampini    Level: developer
2070637a0070SStefano Zampini 
207111a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2072637a0070SStefano Zampini @*/
20739371c9d4SSatish Balay PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2074637a0070SStefano Zampini   PetscFunctionBegin;
2075d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2076cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
20779566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2078637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2079637a0070SStefano Zampini   PetscFunctionReturn(0);
2080637a0070SStefano Zampini }
2081637a0070SStefano Zampini 
2082637a0070SStefano Zampini /*@C
208311a5261eSBarry Smith    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2084637a0070SStefano Zampini 
2085637a0070SStefano Zampini    Not Collective
2086637a0070SStefano Zampini 
2087637a0070SStefano Zampini    Input Parameters:
2088637a0070SStefano Zampini .  mat - the matrix
2089637a0070SStefano Zampini 
209011a5261eSBarry Smith    Note:
209111a5261eSBarry Smith    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2092637a0070SStefano Zampini 
2093637a0070SStefano Zampini    Level: developer
2094637a0070SStefano Zampini 
209511a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2096637a0070SStefano Zampini @*/
20979371c9d4SSatish Balay PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2098637a0070SStefano Zampini   PetscFunctionBegin;
2099d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2100cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
21019566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2102637a0070SStefano Zampini   PetscFunctionReturn(0);
2103637a0070SStefano Zampini }
2104637a0070SStefano Zampini 
2105637a0070SStefano Zampini /*@C
210611a5261eSBarry Smith    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2107d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2108d5ea218eSStefano Zampini    into a matrix
2109d5ea218eSStefano Zampini 
2110d5ea218eSStefano Zampini    Not Collective
2111d5ea218eSStefano Zampini 
2112d5ea218eSStefano Zampini    Input Parameters:
2113d5ea218eSStefano Zampini +  mat - the matrix
2114d5ea218eSStefano Zampini -  array - the array in column major order
2115d5ea218eSStefano Zampini 
211611a5261eSBarry Smith    Note:
2117d5ea218eSStefano Zampini    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2118d5ea218eSStefano Zampini    The memory passed in CANNOT be freed by the user. It will be freed
2119d5ea218eSStefano Zampini    when the matrix is destroyed. The array should respect the matrix leading dimension.
2120d5ea218eSStefano Zampini 
2121d5ea218eSStefano Zampini    Level: developer
2122d5ea218eSStefano Zampini 
2123db781477SPatrick Sanan .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2124d5ea218eSStefano Zampini @*/
21259371c9d4SSatish Balay PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2126d5ea218eSStefano Zampini   PetscFunctionBegin;
2127d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2128cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
21299566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2130d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2131d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2132d5ea218eSStefano Zampini }
2133d5ea218eSStefano Zampini 
2134d5ea218eSStefano Zampini /*@C
213511a5261eSBarry Smith    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2136637a0070SStefano Zampini 
2137637a0070SStefano Zampini    Not Collective
2138637a0070SStefano Zampini 
2139637a0070SStefano Zampini    Input Parameters:
2140637a0070SStefano Zampini .  A - the matrix
2141637a0070SStefano Zampini 
2142637a0070SStefano Zampini    Output Parameters
2143637a0070SStefano Zampini .  array - the GPU array in column major order
2144637a0070SStefano Zampini 
2145637a0070SStefano Zampini    Notes:
214611a5261eSBarry Smith    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
214711a5261eSBarry Smith 
214811a5261eSBarry Smith    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2149637a0070SStefano Zampini 
2150637a0070SStefano Zampini    Level: developer
2151637a0070SStefano Zampini 
215211a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2153637a0070SStefano Zampini @*/
21549371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2155637a0070SStefano Zampini   PetscFunctionBegin;
2156d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2157cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
21589566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2159637a0070SStefano Zampini   PetscFunctionReturn(0);
2160637a0070SStefano Zampini }
2161637a0070SStefano Zampini 
2162637a0070SStefano Zampini /*@C
216311a5261eSBarry Smith    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2164637a0070SStefano Zampini 
2165637a0070SStefano Zampini    Not Collective
2166637a0070SStefano Zampini 
2167637a0070SStefano Zampini    Input Parameters:
2168637a0070SStefano Zampini +  A - the matrix
2169637a0070SStefano Zampini -  array - the GPU array in column major order
2170637a0070SStefano Zampini 
2171637a0070SStefano Zampini    Level: developer
2172637a0070SStefano Zampini 
217311a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2174637a0070SStefano Zampini @*/
21759371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2176637a0070SStefano Zampini   PetscFunctionBegin;
2177d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2178cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
21799566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2180637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2181637a0070SStefano Zampini   PetscFunctionReturn(0);
2182637a0070SStefano Zampini }
2183637a0070SStefano Zampini 
2184637a0070SStefano Zampini /*@C
218511a5261eSBarry Smith    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2186637a0070SStefano Zampini 
2187637a0070SStefano Zampini    Not Collective
2188637a0070SStefano Zampini 
2189637a0070SStefano Zampini    Input Parameters:
2190637a0070SStefano Zampini .  A - the matrix
2191637a0070SStefano Zampini 
2192637a0070SStefano Zampini    Output Parameters
2193637a0070SStefano Zampini .  array - the GPU array in column major order
2194637a0070SStefano Zampini 
219511a5261eSBarry Smith    Note:
219611a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2197637a0070SStefano Zampini 
2198637a0070SStefano Zampini    Level: developer
2199637a0070SStefano Zampini 
220011a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2201637a0070SStefano Zampini @*/
22029371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2203637a0070SStefano Zampini   PetscFunctionBegin;
2204d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2205cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2206637a0070SStefano Zampini   PetscFunctionReturn(0);
2207637a0070SStefano Zampini }
2208637a0070SStefano Zampini 
2209637a0070SStefano Zampini /*@C
221011a5261eSBarry Smith    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2211637a0070SStefano Zampini 
2212637a0070SStefano Zampini    Not Collective
2213637a0070SStefano Zampini 
2214637a0070SStefano Zampini    Input Parameters:
2215637a0070SStefano Zampini +  A - the matrix
2216637a0070SStefano Zampini -  array - the GPU array in column major order
2217637a0070SStefano Zampini 
221811a5261eSBarry Smith    Note:
221911a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2220637a0070SStefano Zampini 
2221637a0070SStefano Zampini    Level: developer
2222637a0070SStefano Zampini 
222311a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2224637a0070SStefano Zampini @*/
22259371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2226637a0070SStefano Zampini   PetscFunctionBegin;
2227cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2228637a0070SStefano Zampini   PetscFunctionReturn(0);
2229637a0070SStefano Zampini }
2230637a0070SStefano Zampini 
2231637a0070SStefano Zampini /*@C
223211a5261eSBarry Smith    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2233637a0070SStefano Zampini 
2234637a0070SStefano Zampini    Not Collective
2235637a0070SStefano Zampini 
2236637a0070SStefano Zampini    Input Parameters:
2237637a0070SStefano Zampini .  A - the matrix
2238637a0070SStefano Zampini 
2239637a0070SStefano Zampini    Output Parameters
2240637a0070SStefano Zampini .  array - the GPU array in column major order
2241637a0070SStefano Zampini 
224211a5261eSBarry Smith    Note:
224311a5261eSBarry Smith    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2244637a0070SStefano Zampini 
2245637a0070SStefano Zampini    Level: developer
2246637a0070SStefano Zampini 
224711a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2248637a0070SStefano Zampini @*/
22499371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2250637a0070SStefano Zampini   PetscFunctionBegin;
2251d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2252cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
22539566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2254637a0070SStefano Zampini   PetscFunctionReturn(0);
2255637a0070SStefano Zampini }
2256637a0070SStefano Zampini 
2257637a0070SStefano Zampini /*@C
225811a5261eSBarry Smith    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2259637a0070SStefano Zampini 
2260637a0070SStefano Zampini    Not Collective
2261637a0070SStefano Zampini 
2262637a0070SStefano Zampini    Input Parameters:
2263637a0070SStefano Zampini +  A - the matrix
2264637a0070SStefano Zampini -  array - the GPU array in column major order
2265637a0070SStefano Zampini 
2266637a0070SStefano Zampini    Level: developer
2267637a0070SStefano Zampini 
226811a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2269637a0070SStefano Zampini @*/
22709371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2271637a0070SStefano Zampini   PetscFunctionBegin;
2272d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2273cac4c232SBarry Smith   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
22749566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2275637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2276637a0070SStefano Zampini   PetscFunctionReturn(0);
2277637a0070SStefano Zampini }
2278637a0070SStefano Zampini #endif
2279637a0070SStefano Zampini 
2280637a0070SStefano Zampini /*@C
228111a5261eSBarry Smith    MatCreateDense - Creates a matrix in `MATDENSE` format.
22828965ea79SLois Curfman McInnes 
2283d083f849SBarry Smith    Collective
2284db81eaa0SLois Curfman McInnes 
22858965ea79SLois Curfman McInnes    Input Parameters:
2286db81eaa0SLois Curfman McInnes +  comm - MPI communicator
228711a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
228811a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
228911a5261eSBarry Smith .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
229011a5261eSBarry Smith .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
229111a5261eSBarry Smith -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2292dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
22938965ea79SLois Curfman McInnes 
22948965ea79SLois Curfman McInnes    Output Parameter:
2295477f1c0bSLois Curfman McInnes .  A - the matrix
22968965ea79SLois Curfman McInnes 
2297b259b22eSLois Curfman McInnes    Notes:
229839ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
229939ddd567SLois Curfman McInnes    storage by columns.
230011a5261eSBarry Smith 
230111a5261eSBarry Smith    Although local portions of the matrix are stored in column-major
2302002bc6daSRichard Tran Mills    order, the matrix is partitioned across MPI ranks by row.
23038965ea79SLois Curfman McInnes 
230418f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
230518f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
230611a5261eSBarry Smith    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
230718f449edSLois Curfman McInnes 
23088965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
23098965ea79SLois Curfman McInnes    (possibly both).
23108965ea79SLois Curfman McInnes 
2311027ccd11SLois Curfman McInnes    Level: intermediate
2312027ccd11SLois Curfman McInnes 
231311a5261eSBarry Smith .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
23148965ea79SLois Curfman McInnes @*/
23159371c9d4SSatish Balay PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
23163a40ed3dSBarry Smith   PetscFunctionBegin;
23179566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23189566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23196f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSE));
23209566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(*A, data));
23216f08ad05SPierre Jolivet   PetscCall(MatMPIDenseSetPreallocation(*A, data));
23223a40ed3dSBarry Smith   PetscFunctionReturn(0);
23238965ea79SLois Curfman McInnes }
23248965ea79SLois Curfman McInnes 
2325637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2326637a0070SStefano Zampini /*@C
232711a5261eSBarry Smith    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2328637a0070SStefano Zampini 
2329637a0070SStefano Zampini    Collective
2330637a0070SStefano Zampini 
2331637a0070SStefano Zampini    Input Parameters:
2332637a0070SStefano Zampini +  comm - MPI communicator
233311a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
233411a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
233511a5261eSBarry Smith .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
233611a5261eSBarry Smith .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2337637a0070SStefano Zampini -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2338637a0070SStefano Zampini    to control matrix memory allocation.
2339637a0070SStefano Zampini 
2340637a0070SStefano Zampini    Output Parameter:
2341637a0070SStefano Zampini .  A - the matrix
2342637a0070SStefano Zampini 
2343637a0070SStefano Zampini    Level: intermediate
2344637a0070SStefano Zampini 
234511a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2346637a0070SStefano Zampini @*/
23479371c9d4SSatish Balay PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2348637a0070SStefano Zampini   PetscFunctionBegin;
23499566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
2350637a0070SStefano Zampini   PetscValidLogicalCollectiveBool(*A, !!data, 6);
23519566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23526f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSECUDA));
23539566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
23546f08ad05SPierre Jolivet   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2355637a0070SStefano Zampini   PetscFunctionReturn(0);
2356637a0070SStefano Zampini }
2357637a0070SStefano Zampini #endif
2358637a0070SStefano Zampini 
23599371c9d4SSatish Balay static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
23608965ea79SLois Curfman McInnes   Mat           mat;
23613501a2bdSLois Curfman McInnes   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
23628965ea79SLois Curfman McInnes 
23633a40ed3dSBarry Smith   PetscFunctionBegin;
2364f4259b30SLisandro Dalcin   *newmat = NULL;
23659566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
23669566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
23679566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2368834f8fabSBarry Smith   a = (Mat_MPIDense *)mat->data;
23695aa7edbeSHong Zhang 
2370d5f3da31SBarry Smith   mat->factortype   = A->factortype;
2371c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
2372273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
23738965ea79SLois Curfman McInnes 
2374e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
23753782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
2376e04c1aa4SHong Zhang 
23779566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
23789566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
23798965ea79SLois Curfman McInnes 
23809566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
238101b82886SBarry Smith 
23828965ea79SLois Curfman McInnes   *newmat = mat;
23833a40ed3dSBarry Smith   PetscFunctionReturn(0);
23848965ea79SLois Curfman McInnes }
23858965ea79SLois Curfman McInnes 
23869371c9d4SSatish Balay PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
238787d5ce66SSatish Balay   PetscBool isbinary;
238887d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
238987d5ce66SSatish Balay   PetscBool ishdf5;
239087d5ce66SSatish Balay #endif
2391eb91f321SVaclav Hapla 
2392eb91f321SVaclav Hapla   PetscFunctionBegin;
2393eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2394eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2395eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
23969566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
23979566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
239887d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
23999566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
240087d5ce66SSatish Balay #endif
2401eb91f321SVaclav Hapla   if (isbinary) {
24029566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2403eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
240487d5ce66SSatish Balay   } else if (ishdf5) {
24059566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2406eb91f321SVaclav Hapla #endif
240798921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2408eb91f321SVaclav Hapla   PetscFunctionReturn(0);
2409eb91f321SVaclav Hapla }
2410eb91f321SVaclav Hapla 
24119371c9d4SSatish Balay static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
24126e4ee0c6SHong Zhang   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
24136e4ee0c6SHong Zhang   Mat           a, b;
241490ace30eSBarry Smith 
24156e4ee0c6SHong Zhang   PetscFunctionBegin;
24166e4ee0c6SHong Zhang   a = matA->A;
24176e4ee0c6SHong Zhang   b = matB->A;
24182cf15c64SPierre Jolivet   PetscCall(MatEqual(a, b, flag));
24192cf15c64SPierre Jolivet   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
24206e4ee0c6SHong Zhang   PetscFunctionReturn(0);
24216e4ee0c6SHong Zhang }
242290ace30eSBarry Smith 
24239371c9d4SSatish Balay PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
24246718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2425baa3c1c6SHong Zhang 
2426baa3c1c6SHong Zhang   PetscFunctionBegin;
24279566063dSJacob Faibussowitsch   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
24289566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&atb->atb));
24299566063dSJacob Faibussowitsch   PetscCall(PetscFree(atb));
2430baa3c1c6SHong Zhang   PetscFunctionReturn(0);
2431baa3c1c6SHong Zhang }
2432baa3c1c6SHong Zhang 
24339371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
24346718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2435cc48ffa7SToby Isaac 
2436cc48ffa7SToby Isaac   PetscFunctionBegin;
24379566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
24389566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
24399566063dSJacob Faibussowitsch   PetscCall(PetscFree(abt));
2440cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2441cc48ffa7SToby Isaac }
2442cc48ffa7SToby Isaac 
24439371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2444baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
24456718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2446cb20be35SHong Zhang   MPI_Comm               comm;
24476718818eSStefano Zampini   PetscMPIInt            size, *recvcounts;
24486718818eSStefano Zampini   PetscScalar           *carray, *sendbuf;
2449637a0070SStefano Zampini   const PetscScalar     *atbarray;
2450a1f9a5e6SStefano Zampini   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2451e68c0b26SHong Zhang   const PetscInt        *ranges;
2452cb20be35SHong Zhang 
2453cb20be35SHong Zhang   PetscFunctionBegin;
24546718818eSStefano Zampini   MatCheckProduct(C, 3);
24555f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
24566718818eSStefano Zampini   atb        = (Mat_TransMatMultDense *)C->product->data;
24576718818eSStefano Zampini   recvcounts = atb->recvcounts;
24586718818eSStefano Zampini   sendbuf    = atb->sendbuf;
24596718818eSStefano Zampini 
24609566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
24619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2462e68c0b26SHong Zhang 
2463c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
24649566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2465cb20be35SHong Zhang 
24669566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
2467cb20be35SHong Zhang 
2468660d5466SHong Zhang   /* arrange atbarray into sendbuf */
24699566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
24709566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2471637a0070SStefano Zampini   for (proc = 0, k = 0; proc < size; proc++) {
2472baa3c1c6SHong Zhang     for (j = 0; j < cN; j++) {
2473a1f9a5e6SStefano Zampini       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2474cb20be35SHong Zhang     }
2475cb20be35SHong Zhang   }
24769566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2477637a0070SStefano Zampini 
2478c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
24799566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
24809566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
24819566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
24829566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
24839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2484cb20be35SHong Zhang   PetscFunctionReturn(0);
2485cb20be35SHong Zhang }
2486cb20be35SHong Zhang 
24879371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2488cb20be35SHong Zhang   MPI_Comm               comm;
2489baa3c1c6SHong Zhang   PetscMPIInt            size;
2490660d5466SHong Zhang   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2491baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
24926718818eSStefano Zampini   PetscBool              cisdense;
24930acaeb71SStefano Zampini   PetscInt               i;
24940acaeb71SStefano Zampini   const PetscInt        *ranges;
2495cb20be35SHong Zhang 
2496cb20be35SHong Zhang   PetscFunctionBegin;
24978f83a4d9SJacob Faibussowitsch   MatCheckProduct(C, 4);
24985f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
24999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2500cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
250198921bdaSJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2502cb20be35SHong Zhang   }
2503cb20be35SHong Zhang 
25044222ddf1SHong Zhang   /* create matrix product C */
25059566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
25069566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
250748a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
25089566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2509baa3c1c6SHong Zhang 
25104222ddf1SHong Zhang   /* create data structure for reuse C */
25119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
25129566063dSJacob Faibussowitsch   PetscCall(PetscNew(&atb));
25134222ddf1SHong Zhang   cM = C->rmap->N;
25149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
25159566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
25160acaeb71SStefano Zampini   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2517baa3c1c6SHong Zhang 
25186718818eSStefano Zampini   C->product->data    = atb;
25196718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2520cb20be35SHong Zhang   PetscFunctionReturn(0);
2521cb20be35SHong Zhang }
2522cb20be35SHong Zhang 
25239371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2524cc48ffa7SToby Isaac   MPI_Comm               comm;
2525cc48ffa7SToby Isaac   PetscMPIInt            i, size;
2526cc48ffa7SToby Isaac   PetscInt               maxRows, bufsiz;
2527cc48ffa7SToby Isaac   PetscMPIInt            tag;
25284222ddf1SHong Zhang   PetscInt               alg;
2529cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
25304222ddf1SHong Zhang   Mat_Product           *product = C->product;
25314222ddf1SHong Zhang   PetscBool              flg;
2532cc48ffa7SToby Isaac 
2533cc48ffa7SToby Isaac   PetscFunctionBegin;
25346718818eSStefano Zampini   MatCheckProduct(C, 4);
25355f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
25364222ddf1SHong Zhang   /* check local size of A and B */
25375f80ce2aSJacob Faibussowitsch   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2538cc48ffa7SToby Isaac 
25399566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2540637a0070SStefano Zampini   alg = flg ? 0 : 1;
2541cc48ffa7SToby Isaac 
25424222ddf1SHong Zhang   /* setup matrix product C */
25439566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
25449566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATMPIDENSE));
25459566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
25469566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
25474222ddf1SHong Zhang 
25484222ddf1SHong Zhang   /* create data structure for reuse C */
25499566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
25509566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
25519566063dSJacob Faibussowitsch   PetscCall(PetscNew(&abt));
2552cc48ffa7SToby Isaac   abt->tag = tag;
2553faa55883SToby Isaac   abt->alg = alg;
2554faa55883SToby Isaac   switch (alg) {
25554222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2556cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2557cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
25589566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2559faa55883SToby Isaac     break;
25604222ddf1SHong Zhang   default: /* alg: "allgatherv" */
25619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
25629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2563faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2564faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2565faa55883SToby Isaac     break;
2566faa55883SToby Isaac   }
2567cc48ffa7SToby Isaac 
25686718818eSStefano Zampini   C->product->data                = abt;
25696718818eSStefano Zampini   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
25704222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2571cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2572cc48ffa7SToby Isaac }
2573cc48ffa7SToby Isaac 
25749371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2575cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
25766718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2577cc48ffa7SToby Isaac   MPI_Comm               comm;
2578cc48ffa7SToby Isaac   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2579f4259b30SLisandro Dalcin   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2580cc48ffa7SToby Isaac   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2581cc48ffa7SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2582637a0070SStefano Zampini   const PetscScalar     *av, *bv;
258323fff9afSBarry Smith   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2584cc48ffa7SToby Isaac   MPI_Request            reqs[2];
2585cc48ffa7SToby Isaac   const PetscInt        *ranges;
2586cc48ffa7SToby Isaac 
2587cc48ffa7SToby Isaac   PetscFunctionBegin;
25886718818eSStefano Zampini   MatCheckProduct(C, 3);
25895f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
25906718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
25919566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
25929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
25939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
25949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
25959566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
25969566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
25979566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
25989566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
25999566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &i));
26009566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &blda));
26019566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
26029566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
26039566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(B, &ranges));
2604cc48ffa7SToby Isaac   bn = B->rmap->n;
2605637a0070SStefano Zampini   if (blda == bn) {
2606637a0070SStefano Zampini     sendbuf = (PetscScalar *)bv;
2607cc48ffa7SToby Isaac   } else {
2608cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2609cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2610ad540459SPierre Jolivet       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2611cc48ffa7SToby Isaac     }
2612cc48ffa7SToby Isaac   }
2613cc48ffa7SToby Isaac   if (size > 1) {
2614cc48ffa7SToby Isaac     sendto   = (rank + size - 1) % size;
2615cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2616cc48ffa7SToby Isaac   } else {
2617cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2618cc48ffa7SToby Isaac   }
26199566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
26209566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2621cc48ffa7SToby Isaac   recvisfrom = rank;
2622cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2623cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2624cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2625cc48ffa7SToby Isaac     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2626cc48ffa7SToby Isaac 
2627cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2628cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2629cc48ffa7SToby Isaac       sendsiz = cK * bn;
2630cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2631cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
26329566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
26339566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2634cc48ffa7SToby Isaac     }
2635cc48ffa7SToby Isaac 
2636cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
26379566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2638792fecdfSBarry Smith     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2639cc48ffa7SToby Isaac 
2640cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2641cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
26429566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2643cc48ffa7SToby Isaac     }
2644cc48ffa7SToby Isaac     bn         = nextbn;
2645cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2646cc48ffa7SToby Isaac     sendbuf    = recvbuf;
2647cc48ffa7SToby Isaac   }
26489566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
26499566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
26509566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
26519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
26529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2653cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2654cc48ffa7SToby Isaac }
2655cc48ffa7SToby Isaac 
26569371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2657faa55883SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
26586718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2659faa55883SToby Isaac   MPI_Comm               comm;
2660637a0070SStefano Zampini   PetscMPIInt            size;
2661637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2662637a0070SStefano Zampini   const PetscScalar     *av, *bv;
2663637a0070SStefano Zampini   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2664faa55883SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2665637a0070SStefano Zampini   PetscBLASInt           cm, cn, ck, alda, clda;
2666faa55883SToby Isaac 
2667faa55883SToby Isaac   PetscFunctionBegin;
26686718818eSStefano Zampini   MatCheckProduct(C, 3);
26695f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
26706718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
26719566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
26729566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
26739566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
26749566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
26759566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
26769566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
26779566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
26789566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &blda));
26799566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
26809566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
2681faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2682faa55883SToby Isaac   bn      = B->rmap->n;
2683faa55883SToby Isaac   sendbuf = abt->buf[0];
2684faa55883SToby Isaac   recvbuf = abt->buf[1];
2685faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2686ad540459SPierre Jolivet     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2687faa55883SToby Isaac   }
26889566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
26899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
26909566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
26919566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
26929566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2693792fecdfSBarry Smith   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
26949566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
26959566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
26969566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
26979566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
26989566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2699faa55883SToby Isaac   PetscFunctionReturn(0);
2700faa55883SToby Isaac }
2701faa55883SToby Isaac 
27029371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
27036718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2704faa55883SToby Isaac 
2705faa55883SToby Isaac   PetscFunctionBegin;
27066718818eSStefano Zampini   MatCheckProduct(C, 3);
27075f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
27086718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
2709faa55883SToby Isaac   switch (abt->alg) {
27109371c9d4SSatish Balay   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
27119371c9d4SSatish Balay   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2712faa55883SToby Isaac   }
2713faa55883SToby Isaac   PetscFunctionReturn(0);
2714faa55883SToby Isaac }
2715faa55883SToby Isaac 
27169371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
27176718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2718320f2790SHong Zhang 
2719320f2790SHong Zhang   PetscFunctionBegin;
27209566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ce));
27219566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ae));
27229566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Be));
27239566063dSJacob Faibussowitsch   PetscCall(PetscFree(ab));
2724320f2790SHong Zhang   PetscFunctionReturn(0);
2725320f2790SHong Zhang }
2726320f2790SHong Zhang 
27275242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
27289371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
27296718818eSStefano Zampini   Mat_MatMultDense *ab;
2730320f2790SHong Zhang 
2731320f2790SHong Zhang   PetscFunctionBegin;
27326718818eSStefano Zampini   MatCheckProduct(C, 3);
27335f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
27346718818eSStefano Zampini   ab = (Mat_MatMultDense *)C->product->data;
27359566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
27369566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
27379566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
27389566063dSJacob Faibussowitsch   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2739320f2790SHong Zhang   PetscFunctionReturn(0);
2740320f2790SHong Zhang }
2741320f2790SHong Zhang 
27429371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2743320f2790SHong Zhang   Mat               Ae, Be, Ce;
2744320f2790SHong Zhang   Mat_MatMultDense *ab;
2745320f2790SHong Zhang 
2746320f2790SHong Zhang   PetscFunctionBegin;
27476718818eSStefano Zampini   MatCheckProduct(C, 4);
27485f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
27494222ddf1SHong Zhang   /* check local size of A and B */
2750320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
275198921bdaSJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2752320f2790SHong Zhang   }
2753320f2790SHong Zhang 
27544222ddf1SHong Zhang   /* create elemental matrices Ae and Be */
27559566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
27569566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
27579566063dSJacob Faibussowitsch   PetscCall(MatSetType(Ae, MATELEMENTAL));
27589566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Ae));
27599566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2760320f2790SHong Zhang 
27619566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
27629566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
27639566063dSJacob Faibussowitsch   PetscCall(MatSetType(Be, MATELEMENTAL));
27649566063dSJacob Faibussowitsch   PetscCall(MatSetUp(Be));
27659566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2766320f2790SHong Zhang 
27674222ddf1SHong Zhang   /* compute symbolic Ce = Ae*Be */
27689566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
27699566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
27704222ddf1SHong Zhang 
27714222ddf1SHong Zhang   /* setup C */
27729566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
27739566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATDENSE));
27749566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2775320f2790SHong Zhang 
2776320f2790SHong Zhang   /* create data structure for reuse Cdense */
27779566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ab));
2778320f2790SHong Zhang   ab->Ae = Ae;
2779320f2790SHong Zhang   ab->Be = Be;
2780320f2790SHong Zhang   ab->Ce = Ce;
27816718818eSStefano Zampini 
27826718818eSStefano Zampini   C->product->data       = ab;
27836718818eSStefano Zampini   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
27844222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2785320f2790SHong Zhang   PetscFunctionReturn(0);
2786320f2790SHong Zhang }
27874222ddf1SHong Zhang #endif
27884222ddf1SHong Zhang /* ----------------------------------------------- */
27894222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
27909371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2791320f2790SHong Zhang   PetscFunctionBegin;
27924222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
27934222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
2794320f2790SHong Zhang   PetscFunctionReturn(0);
2795320f2790SHong Zhang }
27965242a7b1SHong Zhang #endif
279786aefd0dSHong Zhang 
27989371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
27994222ddf1SHong Zhang   Mat_Product *product = C->product;
28004222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
28014222ddf1SHong Zhang 
28024222ddf1SHong Zhang   PetscFunctionBegin;
28034222ddf1SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
280498921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
28056718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
28066718818eSStefano Zampini   C->ops->productsymbolic          = MatProductSymbolic_AtB;
28074222ddf1SHong Zhang   PetscFunctionReturn(0);
28084222ddf1SHong Zhang }
28094222ddf1SHong Zhang 
28109371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
28114222ddf1SHong Zhang   Mat_Product *product     = C->product;
28124222ddf1SHong Zhang   const char  *algTypes[2] = {"allgatherv", "cyclic"};
28134222ddf1SHong Zhang   PetscInt     alg, nalg = 2;
28144222ddf1SHong Zhang   PetscBool    flg = PETSC_FALSE;
28154222ddf1SHong Zhang 
28164222ddf1SHong Zhang   PetscFunctionBegin;
28174222ddf1SHong Zhang   /* Set default algorithm */
28184222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
28199566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "default", &flg));
282048a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
28214222ddf1SHong Zhang 
28224222ddf1SHong Zhang   /* Get runtime option */
28234222ddf1SHong Zhang   if (product->api_user) {
2824d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
28259566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2826d0609cedSBarry Smith     PetscOptionsEnd();
28274222ddf1SHong Zhang   } else {
2828d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
28299566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2830d0609cedSBarry Smith     PetscOptionsEnd();
28314222ddf1SHong Zhang   }
283248a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
28334222ddf1SHong Zhang 
28344222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
28354222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
28364222ddf1SHong Zhang   PetscFunctionReturn(0);
28374222ddf1SHong Zhang }
28384222ddf1SHong Zhang 
28399371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
28404222ddf1SHong Zhang   Mat_Product *product = C->product;
28414222ddf1SHong Zhang 
28424222ddf1SHong Zhang   PetscFunctionBegin;
28434222ddf1SHong Zhang   switch (product->type) {
28444222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
28459371c9d4SSatish Balay   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
28464222ddf1SHong Zhang #endif
28479371c9d4SSatish Balay   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
28489371c9d4SSatish Balay   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
28499371c9d4SSatish Balay   default: break;
28504222ddf1SHong Zhang   }
28514222ddf1SHong Zhang   PetscFunctionReturn(0);
28524222ddf1SHong Zhang }
2853