1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 8baa3c1c6SHong Zhang #include <petscblaslapack.h> 98965ea79SLois Curfman McInnes 10ab92ecdeSBarry Smith /*@ 11*11a5261eSBarry Smith MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 12ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 13ab92ecdeSBarry Smith 14ab92ecdeSBarry Smith Input Parameter: 15*11a5261eSBarry Smith . A - the sequential or MPI dense matrix 16ab92ecdeSBarry Smith 17ab92ecdeSBarry Smith Output Parameter: 18ab92ecdeSBarry Smith . B - the inner matrix 19ab92ecdeSBarry Smith 208e6c10adSSatish Balay Level: intermediate 218e6c10adSSatish Balay 22*11a5261eSBarry Smith .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 23ab92ecdeSBarry Smith @*/ 249371c9d4SSatish Balay PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) { 25ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 26ace3abfcSBarry Smith PetscBool flg; 27ab92ecdeSBarry Smith 28ab92ecdeSBarry Smith PetscFunctionBegin; 29d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 30d5ea218eSStefano Zampini PetscValidPointer(B, 2); 319566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 322205254eSKarl Rupp if (flg) *B = mat->A; 33f140bc17SStefano Zampini else { 349566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 3528b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 36f140bc17SStefano Zampini *B = A; 37f140bc17SStefano Zampini } 38ab92ecdeSBarry Smith PetscFunctionReturn(0); 39ab92ecdeSBarry Smith } 40ab92ecdeSBarry Smith 419371c9d4SSatish Balay PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) { 42a76f77c3SStefano Zampini Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 43a76f77c3SStefano Zampini Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 44a76f77c3SStefano Zampini 45a76f77c3SStefano Zampini PetscFunctionBegin; 469566063dSJacob Faibussowitsch PetscCall(MatCopy(Amat->A, Bmat->A, s)); 47a76f77c3SStefano Zampini PetscFunctionReturn(0); 48a76f77c3SStefano Zampini } 49a76f77c3SStefano Zampini 509371c9d4SSatish Balay PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) { 512f605a99SJose E. Roman Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 522f605a99SJose E. Roman PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 532f605a99SJose E. Roman PetscScalar *v; 542f605a99SJose E. Roman 552f605a99SJose E. Roman PetscFunctionBegin; 562f605a99SJose E. Roman PetscCall(MatDenseGetArray(mat->A, &v)); 572f605a99SJose E. Roman PetscCall(MatDenseGetLDA(mat->A, &lda)); 582f605a99SJose E. Roman rend2 = PetscMin(rend, A->cmap->N); 592f605a99SJose E. Roman if (rend2 > rstart) { 602f605a99SJose E. Roman for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 612f605a99SJose E. Roman PetscCall(PetscLogFlops(rend2 - rstart)); 622f605a99SJose E. Roman } 632f605a99SJose E. Roman PetscCall(MatDenseRestoreArray(mat->A, &v)); 642f605a99SJose E. Roman PetscFunctionReturn(0); 652f605a99SJose E. Roman } 662f605a99SJose E. Roman 679371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 68ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 69d0f46423SBarry Smith PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 70ba8c8a56SBarry Smith 71ba8c8a56SBarry Smith PetscFunctionBegin; 72aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 73ba8c8a56SBarry Smith lrow = row - rstart; 749566063dSJacob Faibussowitsch PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 75ba8c8a56SBarry Smith PetscFunctionReturn(0); 76ba8c8a56SBarry Smith } 77ba8c8a56SBarry Smith 789371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 79637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 80637a0070SStefano Zampini PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 81ba8c8a56SBarry Smith 82ba8c8a56SBarry Smith PetscFunctionBegin; 83aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 84637a0070SStefano Zampini lrow = row - rstart; 859566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 86ba8c8a56SBarry Smith PetscFunctionReturn(0); 87ba8c8a56SBarry Smith } 88ba8c8a56SBarry Smith 899371c9d4SSatish Balay PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) { 900de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 91d0f46423SBarry Smith PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 9287828ca2SBarry Smith PetscScalar *array; 930de54da6SSatish Balay MPI_Comm comm; 944b6ae80fSStefano Zampini PetscBool flg; 9511bd1e4dSLisandro Dalcin Mat B; 960de54da6SSatish Balay 970de54da6SSatish Balay PetscFunctionBegin; 989566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &flg)); 9928b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 1009566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 101616b8fbbSStefano Zampini if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 1024b6ae80fSStefano Zampini 1039566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 10428b400f6SJacob Faibussowitsch PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 1059566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 1069566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &B)); 1079566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, m, m, m, m)); 1089566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 1099566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 1109566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 1119566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 1129566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 11311bd1e4dSLisandro Dalcin *a = B; 1149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1152205254eSKarl Rupp } else *a = B; 1160de54da6SSatish Balay PetscFunctionReturn(0); 1170de54da6SSatish Balay } 1180de54da6SSatish Balay 1199371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) { 12039b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 121d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 122ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1238965ea79SLois Curfman McInnes 1243a40ed3dSBarry Smith PetscFunctionBegin; 1258965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 1265ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 12708401ef6SPierre Jolivet PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 1288965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1298965ea79SLois Curfman McInnes row = idxm[i] - rstart; 13039b7565bSBarry Smith if (roworiented) { 1319566063dSJacob Faibussowitsch PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 1323a40ed3dSBarry Smith } else { 1338965ea79SLois Curfman McInnes for (j = 0; j < n; j++) { 1345ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 13508401ef6SPierre Jolivet PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 1369566063dSJacob Faibussowitsch PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 13739b7565bSBarry Smith } 1388965ea79SLois Curfman McInnes } 1392205254eSKarl Rupp } else if (!A->donotstash) { 1405080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 14139b7565bSBarry Smith if (roworiented) { 1429566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 143d36fbae8SSatish Balay } else { 1449566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 14539b7565bSBarry Smith } 146b49de8d1SLois Curfman McInnes } 147b49de8d1SLois Curfman McInnes } 1483a40ed3dSBarry Smith PetscFunctionReturn(0); 149b49de8d1SLois Curfman McInnes } 150b49de8d1SLois Curfman McInnes 1519371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 152b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 153d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 154b49de8d1SLois Curfman McInnes 1553a40ed3dSBarry Smith PetscFunctionBegin; 156b49de8d1SLois Curfman McInnes for (i = 0; i < m; i++) { 15754c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 15854c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 159b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 160b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 161b49de8d1SLois Curfman McInnes for (j = 0; j < n; j++) { 16254c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 16354c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 1649566063dSJacob Faibussowitsch PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 165b49de8d1SLois Curfman McInnes } 166e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 1678965ea79SLois Curfman McInnes } 1683a40ed3dSBarry Smith PetscFunctionReturn(0); 1698965ea79SLois Curfman McInnes } 1708965ea79SLois Curfman McInnes 1719371c9d4SSatish Balay static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) { 17249a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 17349a6ff4bSBarry Smith 17449a6ff4bSBarry Smith PetscFunctionBegin; 1759566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, lda)); 17649a6ff4bSBarry Smith PetscFunctionReturn(0); 17749a6ff4bSBarry Smith } 17849a6ff4bSBarry Smith 1799371c9d4SSatish Balay static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) { 180ad16ce7aSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 18117359960SJose E. Roman PetscBool iscuda; 182ad16ce7aSStefano Zampini 183ad16ce7aSStefano Zampini PetscFunctionBegin; 18417359960SJose E. Roman if (!a->A) { 18528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1869566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 1879566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1889566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1899566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A)); 1909566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1919566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1929566063dSJacob Faibussowitsch PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 19317359960SJose E. Roman } 1949566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(a->A, lda)); 195ad16ce7aSStefano Zampini PetscFunctionReturn(0); 196ad16ce7aSStefano Zampini } 197ad16ce7aSStefano Zampini 1989371c9d4SSatish Balay static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) { 199ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *)A->data; 200ff14e315SSatish Balay 2013a40ed3dSBarry Smith PetscFunctionBegin; 20228b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2039566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, array)); 2043a40ed3dSBarry Smith PetscFunctionReturn(0); 205ff14e315SSatish Balay } 206ff14e315SSatish Balay 2079371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) { 2088572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2098572280aSBarry Smith 2108572280aSBarry Smith PetscFunctionBegin; 21128b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2129566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, array)); 2138572280aSBarry Smith PetscFunctionReturn(0); 2148572280aSBarry Smith } 2158572280aSBarry Smith 2169371c9d4SSatish Balay static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) { 2176947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2186947451fSStefano Zampini 2196947451fSStefano Zampini PetscFunctionBegin; 22028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2219566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, array)); 2226947451fSStefano Zampini PetscFunctionReturn(0); 2236947451fSStefano Zampini } 2246947451fSStefano Zampini 2259371c9d4SSatish Balay static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) { 226d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 227d3042a70SBarry Smith 228d3042a70SBarry Smith PetscFunctionBegin; 22928b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 23028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2319566063dSJacob Faibussowitsch PetscCall(MatDensePlaceArray(a->A, array)); 232d3042a70SBarry Smith PetscFunctionReturn(0); 233d3042a70SBarry Smith } 234d3042a70SBarry Smith 2359371c9d4SSatish Balay static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) { 236d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 237d3042a70SBarry Smith 238d3042a70SBarry Smith PetscFunctionBegin; 23928b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 24028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2419566063dSJacob Faibussowitsch PetscCall(MatDenseResetArray(a->A)); 242d3042a70SBarry Smith PetscFunctionReturn(0); 243d3042a70SBarry Smith } 244d3042a70SBarry Smith 2459371c9d4SSatish Balay static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) { 246d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 247d5ea218eSStefano Zampini 248d5ea218eSStefano Zampini PetscFunctionBegin; 24928b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 25028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2519566063dSJacob Faibussowitsch PetscCall(MatDenseReplaceArray(a->A, array)); 252d5ea218eSStefano Zampini PetscFunctionReturn(0); 253d5ea218eSStefano Zampini } 254d5ea218eSStefano Zampini 2559371c9d4SSatish Balay static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 256ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 257637a0070SStefano Zampini PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 2585d0c19d7SBarry Smith const PetscInt *irow, *icol; 259637a0070SStefano Zampini const PetscScalar *v; 260637a0070SStefano Zampini PetscScalar *bv; 261ca3fa75bSLois Curfman McInnes Mat newmat; 2624aa3045dSJed Brown IS iscol_local; 26342a884f0SBarry Smith MPI_Comm comm_is, comm_mat; 264ca3fa75bSLois Curfman McInnes 265ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 2669566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 2679566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 26808401ef6SPierre Jolivet PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 26942a884f0SBarry Smith 2709566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 2719566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &irow)); 2729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &icol)); 2739566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &nrows)); 2749566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &ncols)); 2759566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 276ca3fa75bSLois Curfman McInnes 277ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2787eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2797eba5e9cSLois Curfman McInnes original matrix! */ 280ca3fa75bSLois Curfman McInnes 2819566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 2829566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 283ca3fa75bSLois Curfman McInnes 284ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 285ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 286e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2877eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 288ca3fa75bSLois Curfman McInnes newmat = *B; 289ca3fa75bSLois Curfman McInnes } else { 290ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 2919566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 2929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 2939566063dSJacob Faibussowitsch PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 2949566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 295ca3fa75bSLois Curfman McInnes } 296ca3fa75bSLois Curfman McInnes 297ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 298ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense *)newmat->data; 2999566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(newmatd->A, &bv)); 3009566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mat->A, &v)); 3019566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mat->A, &lda)); 3024aa3045dSJed Brown for (i = 0; i < Ncols; i++) { 303637a0070SStefano Zampini const PetscScalar *av = v + lda * icol[i]; 304ad540459SPierre Jolivet for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 305ca3fa75bSLois Curfman McInnes } 3069566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 3079566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 308ca3fa75bSLois Curfman McInnes 309ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 3109566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 3119566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 312ca3fa75bSLois Curfman McInnes 313ca3fa75bSLois Curfman McInnes /* Free work space */ 3149566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &irow)); 3159566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &icol)); 3169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 317ca3fa75bSLois Curfman McInnes *B = newmat; 318ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 319ca3fa75bSLois Curfman McInnes } 320ca3fa75bSLois Curfman McInnes 3219371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) { 32273a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 32373a71a0fSBarry Smith 3243a40ed3dSBarry Smith PetscFunctionBegin; 3259566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, array)); 3263a40ed3dSBarry Smith PetscFunctionReturn(0); 327ff14e315SSatish Balay } 328ff14e315SSatish Balay 3299371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) { 3308572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3318572280aSBarry Smith 3328572280aSBarry Smith PetscFunctionBegin; 3339566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, array)); 3348572280aSBarry Smith PetscFunctionReturn(0); 3358572280aSBarry Smith } 3368572280aSBarry Smith 3379371c9d4SSatish Balay PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) { 3386947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3396947451fSStefano Zampini 3406947451fSStefano Zampini PetscFunctionBegin; 3419566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 3426947451fSStefano Zampini PetscFunctionReturn(0); 3436947451fSStefano Zampini } 3446947451fSStefano Zampini 3459371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) { 34639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 34713f74950SBarry Smith PetscInt nstash, reallocs; 3488965ea79SLois Curfman McInnes 3493a40ed3dSBarry Smith PetscFunctionBegin; 3504067683fSStefano Zampini if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 3518965ea79SLois Curfman McInnes 3529566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 3539566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 3549566063dSJacob Faibussowitsch PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 3553a40ed3dSBarry Smith PetscFunctionReturn(0); 3568965ea79SLois Curfman McInnes } 3578965ea79SLois Curfman McInnes 3589371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) { 35939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 36013f74950SBarry Smith PetscInt i, *row, *col, flg, j, rstart, ncols; 36113f74950SBarry Smith PetscMPIInt n; 36287828ca2SBarry Smith PetscScalar *val; 3638965ea79SLois Curfman McInnes 3643a40ed3dSBarry Smith PetscFunctionBegin; 365910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 3668965ea79SLois Curfman McInnes /* wait on receives */ 3677ef1d9bdSSatish Balay while (1) { 3689566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 3697ef1d9bdSSatish Balay if (!flg) break; 3708965ea79SLois Curfman McInnes 3717ef1d9bdSSatish Balay for (i = 0; i < n;) { 3727ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3732205254eSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 3742205254eSKarl Rupp if (row[j] != rstart) break; 3752205254eSKarl Rupp } 3767ef1d9bdSSatish Balay if (j < n) ncols = j - i; 3777ef1d9bdSSatish Balay else ncols = n - i; 3787ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3799566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 3807ef1d9bdSSatish Balay i = j; 3818965ea79SLois Curfman McInnes } 3827ef1d9bdSSatish Balay } 3839566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 384910cf402Sprj- } 3858965ea79SLois Curfman McInnes 3869566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mdn->A, mode)); 3879566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mdn->A, mode)); 3883a40ed3dSBarry Smith PetscFunctionReturn(0); 3898965ea79SLois Curfman McInnes } 3908965ea79SLois Curfman McInnes 3919371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIDense(Mat A) { 39239ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 3933a40ed3dSBarry Smith 3943a40ed3dSBarry Smith PetscFunctionBegin; 3959566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 3963a40ed3dSBarry Smith PetscFunctionReturn(0); 3978965ea79SLois Curfman McInnes } 3988965ea79SLois Curfman McInnes 3999371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 40039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 401637a0070SStefano Zampini PetscInt i, len, *lrows; 402637a0070SStefano Zampini 403637a0070SStefano Zampini PetscFunctionBegin; 404637a0070SStefano Zampini /* get locally owned rows */ 4059566063dSJacob Faibussowitsch PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 406637a0070SStefano Zampini /* fix right hand side if needed */ 407637a0070SStefano Zampini if (x && b) { 40897b48c8fSBarry Smith const PetscScalar *xx; 40997b48c8fSBarry Smith PetscScalar *bb; 4108965ea79SLois Curfman McInnes 4119566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 4129566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(b, &bb)); 413637a0070SStefano Zampini for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 4149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 4159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(b, &bb)); 41697b48c8fSBarry Smith } 4179566063dSJacob Faibussowitsch PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 418e2eb51b1SBarry Smith if (diag != 0.0) { 419637a0070SStefano Zampini Vec d; 420b9679d65SBarry Smith 4219566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &d)); 4229566063dSJacob Faibussowitsch PetscCall(VecSet(d, diag)); 4239566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 4249566063dSJacob Faibussowitsch PetscCall(VecDestroy(&d)); 425b9679d65SBarry Smith } 4269566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 4273a40ed3dSBarry Smith PetscFunctionReturn(0); 4288965ea79SLois Curfman McInnes } 4298965ea79SLois Curfman McInnes 430cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 431cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 432cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 433cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 434cc2e6a90SBarry Smith 4359371c9d4SSatish Balay PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) { 43639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 437637a0070SStefano Zampini const PetscScalar *ax; 438637a0070SStefano Zampini PetscScalar *ay; 439d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 440c456f294SBarry Smith 4413a40ed3dSBarry Smith PetscFunctionBegin; 4426f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 4439566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 4449566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 4459566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 4469566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 4479566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 4489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 4499566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 4503a40ed3dSBarry Smith PetscFunctionReturn(0); 4518965ea79SLois Curfman McInnes } 4528965ea79SLois Curfman McInnes 4539371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) { 45439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 455637a0070SStefano Zampini const PetscScalar *ax; 456637a0070SStefano Zampini PetscScalar *ay; 457d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 458c456f294SBarry Smith 4593a40ed3dSBarry Smith PetscFunctionBegin; 4606f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 4619566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 4629566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 4639566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 4649566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 4659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 4669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 4679566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 4683a40ed3dSBarry Smith PetscFunctionReturn(0); 4698965ea79SLois Curfman McInnes } 4708965ea79SLois Curfman McInnes 4719371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) { 472096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 473637a0070SStefano Zampini const PetscScalar *ax; 474637a0070SStefano Zampini PetscScalar *ay; 475d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 476096963f5SLois Curfman McInnes 4773a40ed3dSBarry Smith PetscFunctionBegin; 4786f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 4799566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 4809566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 4819566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 4829566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 4839566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 4849566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 4859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 4869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 4873a40ed3dSBarry Smith PetscFunctionReturn(0); 488096963f5SLois Curfman McInnes } 489096963f5SLois Curfman McInnes 4909371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) { 491096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 492637a0070SStefano Zampini const PetscScalar *ax; 493637a0070SStefano Zampini PetscScalar *ay; 494d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 495096963f5SLois Curfman McInnes 4963a40ed3dSBarry Smith PetscFunctionBegin; 4976f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 4989566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 4999566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 5009566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 5019566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 5029566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 5039566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 5049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 5059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 5063a40ed3dSBarry Smith PetscFunctionReturn(0); 507096963f5SLois Curfman McInnes } 508096963f5SLois Curfman McInnes 5099371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) { 51039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 511637a0070SStefano Zampini PetscInt lda, len, i, n, m = A->rmap->n, radd; 51287828ca2SBarry Smith PetscScalar *x, zero = 0.0; 513637a0070SStefano Zampini const PetscScalar *av; 514ed3cc1f0SBarry Smith 5153a40ed3dSBarry Smith PetscFunctionBegin; 5169566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero)); 5179566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 5189566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n)); 51908401ef6SPierre Jolivet PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 520d0f46423SBarry Smith len = PetscMin(a->A->rmap->n, a->A->cmap->n); 521d0f46423SBarry Smith radd = A->rmap->rstart * m; 5229566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 5239566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 524ad540459SPierre Jolivet for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 5259566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 5269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 5273a40ed3dSBarry Smith PetscFunctionReturn(0); 5288965ea79SLois Curfman McInnes } 5298965ea79SLois Curfman McInnes 5309371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIDense(Mat mat) { 5313501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 532ed3cc1f0SBarry Smith 5333a40ed3dSBarry Smith PetscFunctionBegin; 534aa482453SBarry Smith #if defined(PETSC_USE_LOG) 535c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 5368965ea79SLois Curfman McInnes #endif 5379566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 53828b400f6SJacob Faibussowitsch PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 53928b400f6SJacob Faibussowitsch PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 5409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->A)); 5419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->lvec)); 5429566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&mdn->Mvctx)); 5439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->cvec)); 5449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->cmat)); 54501b82886SBarry Smith 5469566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 5479566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 5488baccfbdSHong Zhang 5499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 5509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 5519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 5529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 5539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 5549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 5559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 5569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 5579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 5589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 5599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 5609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 5619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 5628baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 5648baccfbdSHong Zhang #endif 565d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 5669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 567d24d4204SJose E. Roman #endif 5689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 5699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 5709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 5716718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 5729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 5739566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 5742e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 5752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 5762e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 5772e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 5782e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 5792e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 5802e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 5812e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 5822e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 5832e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 5842e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 5852e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 5862e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 5872e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 5882e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 5896718818eSStefano Zampini #endif 5909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 5919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 5929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 5939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 5949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 5959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 5969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 5979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 5989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 5999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 600f8103e6bSStefano Zampini 601f8103e6bSStefano Zampini PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 6023a40ed3dSBarry Smith PetscFunctionReturn(0); 6038965ea79SLois Curfman McInnes } 60439ddd567SLois Curfman McInnes 60552c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 60652c5f739Sprj- 6079804daf3SBarry Smith #include <petscdraw.h> 6089371c9d4SSatish Balay static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 60939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 610616b8fbbSStefano Zampini PetscMPIInt rank; 61119fd82e9SBarry Smith PetscViewerType vtype; 612ace3abfcSBarry Smith PetscBool iascii, isdraw; 613b0a32e0cSBarry Smith PetscViewer sviewer; 614f3ef73ceSBarry Smith PetscViewerFormat format; 6158965ea79SLois Curfman McInnes 6163a40ed3dSBarry Smith PetscFunctionBegin; 6179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 6189566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 6199566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 62032077d6dSBarry Smith if (iascii) { 6219566063dSJacob Faibussowitsch PetscCall(PetscViewerGetType(viewer, &vtype)); 6229566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 623456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6244e220ebcSLois Curfman McInnes MatInfo info; 6259566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 6269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 6279371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 6289371c9d4SSatish Balay (PetscInt)info.memory)); 6299566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 6309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 6316f08ad05SPierre Jolivet if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 6323a40ed3dSBarry Smith PetscFunctionReturn(0); 633fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 6343a40ed3dSBarry Smith PetscFunctionReturn(0); 6358965ea79SLois Curfman McInnes } 636f1af5d2fSBarry Smith } else if (isdraw) { 637b0a32e0cSBarry Smith PetscDraw draw; 638ace3abfcSBarry Smith PetscBool isnull; 639f1af5d2fSBarry Smith 6409566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 6419566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 642f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 643f1af5d2fSBarry Smith } 64477ed5343SBarry Smith 6457da1fb6eSBarry Smith { 6468965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6478965ea79SLois Curfman McInnes Mat A; 648d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 649ba8c8a56SBarry Smith PetscInt *cols; 650ba8c8a56SBarry Smith PetscScalar *vals; 6518965ea79SLois Curfman McInnes 6529566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 653dd400576SPatrick Sanan if (rank == 0) { 6549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 6553a40ed3dSBarry Smith } else { 6569566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 6578965ea79SLois Curfman McInnes } 6587adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 6599566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPIDENSE)); 6609566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 6619566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 6628965ea79SLois Curfman McInnes 66339ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 66439ddd567SLois Curfman McInnes but it's quick for now */ 66551022da4SBarry Smith A->insertmode = INSERT_VALUES; 6662205254eSKarl Rupp 6672205254eSKarl Rupp row = mat->rmap->rstart; 6682205254eSKarl Rupp m = mdn->A->rmap->n; 6698965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 6709566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 6719566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 6729566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 67339ddd567SLois Curfman McInnes row++; 6748965ea79SLois Curfman McInnes } 6758965ea79SLois Curfman McInnes 6769566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 6779566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 6789566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 679dd400576SPatrick Sanan if (rank == 0) { 6809566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 6819566063dSJacob Faibussowitsch PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 6828965ea79SLois Curfman McInnes } 6839566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 6849566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 6859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 6868965ea79SLois Curfman McInnes } 6873a40ed3dSBarry Smith PetscFunctionReturn(0); 6888965ea79SLois Curfman McInnes } 6898965ea79SLois Curfman McInnes 6909371c9d4SSatish Balay PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) { 691ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw, issocket; 6928965ea79SLois Curfman McInnes 693433994e6SBarry Smith PetscFunctionBegin; 6949566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 6959566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 6969566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 6979566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 6980f5bd95cSBarry Smith 69932077d6dSBarry Smith if (iascii || issocket || isdraw) { 7009566063dSJacob Faibussowitsch PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 7011baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 7023a40ed3dSBarry Smith PetscFunctionReturn(0); 7038965ea79SLois Curfman McInnes } 7048965ea79SLois Curfman McInnes 7059371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) { 7063501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 7073501a2bdSLois Curfman McInnes Mat mdn = mat->A; 7083966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 7098965ea79SLois Curfman McInnes 7103a40ed3dSBarry Smith PetscFunctionBegin; 7114e220ebcSLois Curfman McInnes info->block_size = 1.0; 7122205254eSKarl Rupp 7139566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 7142205254eSKarl Rupp 7159371c9d4SSatish Balay isend[0] = info->nz_used; 7169371c9d4SSatish Balay isend[1] = info->nz_allocated; 7179371c9d4SSatish Balay isend[2] = info->nz_unneeded; 7189371c9d4SSatish Balay isend[3] = info->memory; 7199371c9d4SSatish Balay isend[4] = info->mallocs; 7208965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 7214e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7224e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7234e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7244e220ebcSLois Curfman McInnes info->memory = isend[3]; 7254e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7268965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 7271c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 7282205254eSKarl Rupp 7294e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7304e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7314e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7324e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7334e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7348965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 7351c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 7362205254eSKarl Rupp 7374e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7384e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7394e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7404e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7414e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7428965ea79SLois Curfman McInnes } 7434e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7444e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7454e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7463a40ed3dSBarry Smith PetscFunctionReturn(0); 7478965ea79SLois Curfman McInnes } 7488965ea79SLois Curfman McInnes 7499371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) { 75039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 7518965ea79SLois Curfman McInnes 7523a40ed3dSBarry Smith PetscFunctionBegin; 75312c028f9SKris Buschelman switch (op) { 754512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 75512c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 75612c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 75743674050SBarry Smith MatCheckPreallocated(A, 1); 7589566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 75912c028f9SKris Buschelman break; 76012c028f9SKris Buschelman case MAT_ROW_ORIENTED: 76143674050SBarry Smith MatCheckPreallocated(A, 1); 7624e0d8c25SBarry Smith a->roworiented = flg; 7639566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 76412c028f9SKris Buschelman break; 7658c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 76613fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 76712c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 7689371c9d4SSatish Balay case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 7699371c9d4SSatish Balay case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 77077e54ba9SKris Buschelman case MAT_SYMMETRIC: 77177e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7729a4540c5SBarry Smith case MAT_HERMITIAN: 7739a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 774b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 775b94d7dedSBarry Smith case MAT_SPD: 776600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 7775d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 778b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 779b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 7809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 78177e54ba9SKris Buschelman break; 7829371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 7833a40ed3dSBarry Smith } 7843a40ed3dSBarry Smith PetscFunctionReturn(0); 7858965ea79SLois Curfman McInnes } 7868965ea79SLois Curfman McInnes 7879371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) { 7885b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 789637a0070SStefano Zampini const PetscScalar *l; 790637a0070SStefano Zampini PetscScalar x, *v, *vv, *r; 791637a0070SStefano Zampini PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 7925b2fa520SLois Curfman McInnes 7935b2fa520SLois Curfman McInnes PetscFunctionBegin; 7949566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(mdn->A, &vv)); 7959566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mdn->A, &lda)); 7969566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &s2, &s3)); 7975b2fa520SLois Curfman McInnes if (ll) { 7989566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll, &s2a)); 79908401ef6SPierre Jolivet PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 8009566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ll, &l)); 8015b2fa520SLois Curfman McInnes for (i = 0; i < m; i++) { 8025b2fa520SLois Curfman McInnes x = l[i]; 803637a0070SStefano Zampini v = vv + i; 8049371c9d4SSatish Balay for (j = 0; j < n; j++) { 8059371c9d4SSatish Balay (*v) *= x; 8069371c9d4SSatish Balay v += lda; 8079371c9d4SSatish Balay } 8085b2fa520SLois Curfman McInnes } 8099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ll, &l)); 8109566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 8115b2fa520SLois Curfman McInnes } 8125b2fa520SLois Curfman McInnes if (rr) { 813637a0070SStefano Zampini const PetscScalar *ar; 814637a0070SStefano Zampini 8159566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &s3a)); 81608401ef6SPierre Jolivet PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 8179566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rr, &ar)); 8186f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 8199566063dSJacob Faibussowitsch PetscCall(VecGetArray(mdn->lvec, &r)); 8209566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 8219566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 8229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rr, &ar)); 8235b2fa520SLois Curfman McInnes for (i = 0; i < n; i++) { 8245b2fa520SLois Curfman McInnes x = r[i]; 825637a0070SStefano Zampini v = vv + i * lda; 8262205254eSKarl Rupp for (j = 0; j < m; j++) (*v++) *= x; 8275b2fa520SLois Curfman McInnes } 8289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mdn->lvec, &r)); 8299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 8305b2fa520SLois Curfman McInnes } 8319566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 8325b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8335b2fa520SLois Curfman McInnes } 8345b2fa520SLois Curfman McInnes 8359371c9d4SSatish Balay PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) { 8363501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 83713f74950SBarry Smith PetscInt i, j; 838616b8fbbSStefano Zampini PetscMPIInt size; 839329f5518SBarry Smith PetscReal sum = 0.0; 840637a0070SStefano Zampini const PetscScalar *av, *v; 8413501a2bdSLois Curfman McInnes 8423a40ed3dSBarry Smith PetscFunctionBegin; 8439566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 844637a0070SStefano Zampini v = av; 8459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 846616b8fbbSStefano Zampini if (size == 1) { 8479566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, nrm)); 8483501a2bdSLois Curfman McInnes } else { 8493501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 850d0f46423SBarry Smith for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 8519371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 8529371c9d4SSatish Balay v++; 8533501a2bdSLois Curfman McInnes } 8541c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 8558f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 8569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 8573a40ed3dSBarry Smith } else if (type == NORM_1) { 858329f5518SBarry Smith PetscReal *tmp, *tmp2; 8599566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 860064f8208SBarry Smith *nrm = 0.0; 861637a0070SStefano Zampini v = av; 862d0f46423SBarry Smith for (j = 0; j < mdn->A->cmap->n; j++) { 863d0f46423SBarry Smith for (i = 0; i < mdn->A->rmap->n; i++) { 8649371c9d4SSatish Balay tmp[j] += PetscAbsScalar(*v); 8659371c9d4SSatish Balay v++; 8663501a2bdSLois Curfman McInnes } 8673501a2bdSLois Curfman McInnes } 8681c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 869d0f46423SBarry Smith for (j = 0; j < A->cmap->N; j++) { 870064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8713501a2bdSLois Curfman McInnes } 8729566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmp, tmp2)); 8739566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 8743a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 875329f5518SBarry Smith PetscReal ntemp; 8769566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, &ntemp)); 8771c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 878ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 8793501a2bdSLois Curfman McInnes } 8809566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 8813a40ed3dSBarry Smith PetscFunctionReturn(0); 8823501a2bdSLois Curfman McInnes } 8833501a2bdSLois Curfman McInnes 8849371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) { 8853501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 8863501a2bdSLois Curfman McInnes Mat B; 887d0f46423SBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 888637a0070SStefano Zampini PetscInt j, i, lda; 88987828ca2SBarry Smith PetscScalar *v; 8903501a2bdSLois Curfman McInnes 8913a40ed3dSBarry Smith PetscFunctionBegin; 8927fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 893cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 8949566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 8959566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 8969566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 8979566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 898637a0070SStefano Zampini } else B = *matout; 8993501a2bdSLois Curfman McInnes 9009371c9d4SSatish Balay m = a->A->rmap->n; 9019371c9d4SSatish Balay n = a->A->cmap->n; 9029566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 9039566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 9049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rwork)); 9053501a2bdSLois Curfman McInnes for (i = 0; i < m; i++) rwork[i] = rstart + i; 9061acff37aSSatish Balay for (j = 0; j < n; j++) { 9079566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 908637a0070SStefano Zampini v += lda; 9093501a2bdSLois Curfman McInnes } 9109566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 9119566063dSJacob Faibussowitsch PetscCall(PetscFree(rwork)); 9129566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 9139566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 914cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 9153501a2bdSLois Curfman McInnes *matout = B; 9163501a2bdSLois Curfman McInnes } else { 9179566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &B)); 9183501a2bdSLois Curfman McInnes } 9193a40ed3dSBarry Smith PetscFunctionReturn(0); 920096963f5SLois Curfman McInnes } 921096963f5SLois Curfman McInnes 9226849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 92352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 9248965ea79SLois Curfman McInnes 9259371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIDense(Mat A) { 926273d9f13SBarry Smith PetscFunctionBegin; 9279566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 9289566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 92948a46eb9SPierre Jolivet if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 930273d9f13SBarry Smith PetscFunctionReturn(0); 931273d9f13SBarry Smith } 932273d9f13SBarry Smith 9339371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 934488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 935488007eeSBarry Smith 936488007eeSBarry Smith PetscFunctionBegin; 9379566063dSJacob Faibussowitsch PetscCall(MatAXPY(A->A, alpha, B->A, str)); 938488007eeSBarry Smith PetscFunctionReturn(0); 939488007eeSBarry Smith } 940488007eeSBarry Smith 9419371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIDense(Mat mat) { 942ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 943ba337c44SJed Brown 944ba337c44SJed Brown PetscFunctionBegin; 9459566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 946ba337c44SJed Brown PetscFunctionReturn(0); 947ba337c44SJed Brown } 948ba337c44SJed Brown 9499371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIDense(Mat A) { 950ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951ba337c44SJed Brown 952ba337c44SJed Brown PetscFunctionBegin; 9539566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 954ba337c44SJed Brown PetscFunctionReturn(0); 955ba337c44SJed Brown } 956ba337c44SJed Brown 9579371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIDense(Mat A) { 958ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959ba337c44SJed Brown 960ba337c44SJed Brown PetscFunctionBegin; 9619566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 962ba337c44SJed Brown PetscFunctionReturn(0); 963ba337c44SJed Brown } 964ba337c44SJed Brown 9659371c9d4SSatish Balay static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) { 966637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 96749a6ff4bSBarry Smith 96849a6ff4bSBarry Smith PetscFunctionBegin; 96928b400f6SJacob Faibussowitsch PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 97028b400f6SJacob Faibussowitsch PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 9719566063dSJacob Faibussowitsch PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 97249a6ff4bSBarry Smith PetscFunctionReturn(0); 97349a6ff4bSBarry Smith } 97449a6ff4bSBarry Smith 975857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 97652c5f739Sprj- 9779371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) { 978a873a8cdSSam Reynolds PetscInt i, m, n; 9790716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 9800716a85fSBarry Smith PetscReal *work; 9810716a85fSBarry Smith 9820716a85fSBarry Smith PetscFunctionBegin; 9839566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 9849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &work)); 985857cbf51SRichard Tran Mills if (type == REDUCTION_MEAN_REALPART) { 9869566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 987857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 9889566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 989a873a8cdSSam Reynolds } else { 9909566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 991a873a8cdSSam Reynolds } 992857cbf51SRichard Tran Mills if (type == NORM_2) { 9930716a85fSBarry Smith for (i = 0; i < n; i++) work[i] *= work[i]; 9940716a85fSBarry Smith } 995857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 9961c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 9970716a85fSBarry Smith } else { 9981c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 9990716a85fSBarry Smith } 10009566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 1001857cbf51SRichard Tran Mills if (type == NORM_2) { 1002a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1003857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1004a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] /= m; 10050716a85fSBarry Smith } 10060716a85fSBarry Smith PetscFunctionReturn(0); 10070716a85fSBarry Smith } 10080716a85fSBarry Smith 1009637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 10109371c9d4SSatish Balay PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) { 1011126e4e93SJose E. Roman PetscScalar *da; 1012126e4e93SJose E. Roman PetscInt lda; 1013126e4e93SJose E. Roman 1014126e4e93SJose E. Roman PetscFunctionBegin; 1015126e4e93SJose E. Roman PetscCall(MatDenseCUDAGetArray(A, &da)); 1016126e4e93SJose E. Roman PetscCall(MatDenseGetLDA(A, &lda)); 1017126e4e93SJose E. Roman PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1018126e4e93SJose E. Roman PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1019126e4e93SJose E. Roman PetscCall(MatDenseCUDARestoreArray(A, &da)); 1020126e4e93SJose E. Roman PetscFunctionReturn(0); 1021126e4e93SJose E. Roman } 1022126e4e93SJose E. Roman 10239371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 10246947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10256947451fSStefano Zampini PetscInt lda; 10266947451fSStefano Zampini 10276947451fSStefano Zampini PetscFunctionBegin; 102828b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 102928b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 10306947451fSStefano Zampini if (!a->cvec) { 10319566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 10329566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 10336947451fSStefano Zampini } 10346947451fSStefano Zampini a->vecinuse = col + 1; 10359566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 10369566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 10379566063dSJacob Faibussowitsch PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 10386947451fSStefano Zampini *v = a->cvec; 10396947451fSStefano Zampini PetscFunctionReturn(0); 10406947451fSStefano Zampini } 10416947451fSStefano Zampini 10429371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 10436947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10446947451fSStefano Zampini 10456947451fSStefano Zampini PetscFunctionBegin; 104628b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 104728b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 10486947451fSStefano Zampini a->vecinuse = 0; 10499566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 10509566063dSJacob Faibussowitsch PetscCall(VecCUDAResetArray(a->cvec)); 1051742765d3SMatthew Knepley if (v) *v = NULL; 10526947451fSStefano Zampini PetscFunctionReturn(0); 10536947451fSStefano Zampini } 10546947451fSStefano Zampini 10559371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 10566947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10576947451fSStefano Zampini PetscInt lda; 10586947451fSStefano Zampini 10596947451fSStefano Zampini PetscFunctionBegin; 106028b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 106128b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 10626947451fSStefano Zampini if (!a->cvec) { 10639566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 10649566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 10656947451fSStefano Zampini } 10666947451fSStefano Zampini a->vecinuse = col + 1; 10679566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 10689566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 10699566063dSJacob Faibussowitsch PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 10709566063dSJacob Faibussowitsch PetscCall(VecLockReadPush(a->cvec)); 10716947451fSStefano Zampini *v = a->cvec; 10726947451fSStefano Zampini PetscFunctionReturn(0); 10736947451fSStefano Zampini } 10746947451fSStefano Zampini 10759371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 10766947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10776947451fSStefano Zampini 10786947451fSStefano Zampini PetscFunctionBegin; 107928b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 108028b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 10816947451fSStefano Zampini a->vecinuse = 0; 10829566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 10839566063dSJacob Faibussowitsch PetscCall(VecLockReadPop(a->cvec)); 10849566063dSJacob Faibussowitsch PetscCall(VecCUDAResetArray(a->cvec)); 1085742765d3SMatthew Knepley if (v) *v = NULL; 10866947451fSStefano Zampini PetscFunctionReturn(0); 10876947451fSStefano Zampini } 10886947451fSStefano Zampini 10899371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 10906947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10916947451fSStefano Zampini PetscInt lda; 10926947451fSStefano Zampini 10936947451fSStefano Zampini PetscFunctionBegin; 109428b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 109528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 10966947451fSStefano Zampini if (!a->cvec) { 10979566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 10989566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 10996947451fSStefano Zampini } 11006947451fSStefano Zampini a->vecinuse = col + 1; 11019566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 11029566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 11039566063dSJacob Faibussowitsch PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 11046947451fSStefano Zampini *v = a->cvec; 11056947451fSStefano Zampini PetscFunctionReturn(0); 11066947451fSStefano Zampini } 11076947451fSStefano Zampini 11089371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 11096947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 11106947451fSStefano Zampini 11116947451fSStefano Zampini PetscFunctionBegin; 111228b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 111328b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 11146947451fSStefano Zampini a->vecinuse = 0; 11159566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 11169566063dSJacob Faibussowitsch PetscCall(VecCUDAResetArray(a->cvec)); 1117742765d3SMatthew Knepley if (v) *v = NULL; 11186947451fSStefano Zampini PetscFunctionReturn(0); 11196947451fSStefano Zampini } 11206947451fSStefano Zampini 11219371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1122637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1123637a0070SStefano Zampini 1124637a0070SStefano Zampini PetscFunctionBegin; 112528b400f6SJacob Faibussowitsch PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 112628b400f6SJacob Faibussowitsch PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 11279566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1128637a0070SStefano Zampini PetscFunctionReturn(0); 1129637a0070SStefano Zampini } 1130637a0070SStefano Zampini 11319371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) { 1132637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1133637a0070SStefano Zampini 1134637a0070SStefano Zampini PetscFunctionBegin; 113528b400f6SJacob Faibussowitsch PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 113628b400f6SJacob Faibussowitsch PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 11379566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAResetArray(l->A)); 1138637a0070SStefano Zampini PetscFunctionReturn(0); 1139637a0070SStefano Zampini } 1140637a0070SStefano Zampini 11419371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1142d5ea218eSStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1143d5ea218eSStefano Zampini 1144d5ea218eSStefano Zampini PetscFunctionBegin; 114528b400f6SJacob Faibussowitsch PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 114628b400f6SJacob Faibussowitsch PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 11479566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1148d5ea218eSStefano Zampini PetscFunctionReturn(0); 1149d5ea218eSStefano Zampini } 1150d5ea218eSStefano Zampini 11519371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1152637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1153637a0070SStefano Zampini 1154637a0070SStefano Zampini PetscFunctionBegin; 11559566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1156637a0070SStefano Zampini PetscFunctionReturn(0); 1157637a0070SStefano Zampini } 1158637a0070SStefano Zampini 11599371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1160637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1161637a0070SStefano Zampini 1162637a0070SStefano Zampini PetscFunctionBegin; 11639566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1164637a0070SStefano Zampini PetscFunctionReturn(0); 1165637a0070SStefano Zampini } 1166637a0070SStefano Zampini 11679371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1168637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1169637a0070SStefano Zampini 1170637a0070SStefano Zampini PetscFunctionBegin; 11719566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1172637a0070SStefano Zampini PetscFunctionReturn(0); 1173637a0070SStefano Zampini } 1174637a0070SStefano Zampini 11759371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1176637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1177637a0070SStefano Zampini 1178637a0070SStefano Zampini PetscFunctionBegin; 11799566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1180637a0070SStefano Zampini PetscFunctionReturn(0); 1181637a0070SStefano Zampini } 1182637a0070SStefano Zampini 11839371c9d4SSatish Balay static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1184637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1185637a0070SStefano Zampini 1186637a0070SStefano Zampini PetscFunctionBegin; 11879566063dSJacob Faibussowitsch PetscCall(MatDenseCUDAGetArray(l->A, a)); 1188637a0070SStefano Zampini PetscFunctionReturn(0); 1189637a0070SStefano Zampini } 1190637a0070SStefano Zampini 11919371c9d4SSatish Balay static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1192637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1193637a0070SStefano Zampini 1194637a0070SStefano Zampini PetscFunctionBegin; 11959566063dSJacob Faibussowitsch PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1196637a0070SStefano Zampini PetscFunctionReturn(0); 1197637a0070SStefano Zampini } 1198637a0070SStefano Zampini 11996947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 12006947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 12016947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 12026947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 12036947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 12046947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 12055ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 12066947451fSStefano Zampini 12079371c9d4SSatish Balay static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) { 1208637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1209637a0070SStefano Zampini 1210637a0070SStefano Zampini PetscFunctionBegin; 121128b400f6SJacob Faibussowitsch PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 121228b400f6SJacob Faibussowitsch PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 12131baa6e33SBarry Smith if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1214637a0070SStefano Zampini mat->boundtocpu = bind; 12156947451fSStefano Zampini if (!bind) { 12166947451fSStefano Zampini PetscBool iscuda; 12176947451fSStefano Zampini 12189566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 121948a46eb9SPierre Jolivet if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 12209566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 122148a46eb9SPierre Jolivet if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 12229566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 12239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 12249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 12259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 12269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 12279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1228126e4e93SJose E. Roman mat->ops->shift = MatShift_MPIDenseCUDA; 12296947451fSStefano Zampini } else { 12309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 12319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 12329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 12339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 12349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 12359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1236126e4e93SJose E. Roman mat->ops->shift = MatShift_MPIDense; 1237616b8fbbSStefano Zampini } 12381baa6e33SBarry Smith if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1239637a0070SStefano Zampini PetscFunctionReturn(0); 1240637a0070SStefano Zampini } 1241637a0070SStefano Zampini 12429371c9d4SSatish Balay PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) { 1243637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1244637a0070SStefano Zampini PetscBool iscuda; 1245637a0070SStefano Zampini 1246637a0070SStefano Zampini PetscFunctionBegin; 1247d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 12489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1249637a0070SStefano Zampini if (!iscuda) PetscFunctionReturn(0); 12509566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 12519566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 1252637a0070SStefano Zampini if (!d->A) { 12539566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 12549566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 12559566063dSJacob Faibussowitsch PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1256637a0070SStefano Zampini } 12579566063dSJacob Faibussowitsch PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 12589566063dSJacob Faibussowitsch PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1259637a0070SStefano Zampini A->preallocated = PETSC_TRUE; 12606f08ad05SPierre Jolivet A->assembled = PETSC_TRUE; 1261637a0070SStefano Zampini PetscFunctionReturn(0); 1262637a0070SStefano Zampini } 1263637a0070SStefano Zampini #endif 1264637a0070SStefano Zampini 12659371c9d4SSatish Balay static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) { 126673a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense *)x->data; 126773a71a0fSBarry Smith 126873a71a0fSBarry Smith PetscFunctionBegin; 12699566063dSJacob Faibussowitsch PetscCall(MatSetRandom(d->A, rctx)); 127073a71a0fSBarry Smith PetscFunctionReturn(0); 127173a71a0fSBarry Smith } 127273a71a0fSBarry Smith 12739371c9d4SSatish Balay static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) { 12743b49f96aSBarry Smith PetscFunctionBegin; 12753b49f96aSBarry Smith *missing = PETSC_FALSE; 12763b49f96aSBarry Smith PetscFunctionReturn(0); 12773b49f96aSBarry Smith } 12783b49f96aSBarry Smith 12794222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1280cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12816718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1282a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12836718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 12846718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1285cc48ffa7SToby Isaac 12868965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 128709dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 128809dc0095SBarry Smith MatGetRow_MPIDense, 128909dc0095SBarry Smith MatRestoreRow_MPIDense, 129009dc0095SBarry Smith MatMult_MPIDense, 129197304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 12927c922b88SBarry Smith MatMultTranspose_MPIDense, 12937c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 1294f4259b30SLisandro Dalcin NULL, 1295f4259b30SLisandro Dalcin NULL, 1296f4259b30SLisandro Dalcin NULL, 1297f4259b30SLisandro Dalcin /* 10*/ NULL, 1298f4259b30SLisandro Dalcin NULL, 1299f4259b30SLisandro Dalcin NULL, 1300f4259b30SLisandro Dalcin NULL, 130109dc0095SBarry Smith MatTranspose_MPIDense, 130297304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 13036e4ee0c6SHong Zhang MatEqual_MPIDense, 130409dc0095SBarry Smith MatGetDiagonal_MPIDense, 13055b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 130609dc0095SBarry Smith MatNorm_MPIDense, 130797304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 130809dc0095SBarry Smith MatAssemblyEnd_MPIDense, 130909dc0095SBarry Smith MatSetOption_MPIDense, 131009dc0095SBarry Smith MatZeroEntries_MPIDense, 1311d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1312f4259b30SLisandro Dalcin NULL, 1313f4259b30SLisandro Dalcin NULL, 1314f4259b30SLisandro Dalcin NULL, 1315f4259b30SLisandro Dalcin NULL, 13164994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1317f4259b30SLisandro Dalcin NULL, 1318f4259b30SLisandro Dalcin NULL, 1319c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 1320f4259b30SLisandro Dalcin NULL, 1321d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 1322f4259b30SLisandro Dalcin NULL, 1323f4259b30SLisandro Dalcin NULL, 1324f4259b30SLisandro Dalcin NULL, 1325f4259b30SLisandro Dalcin NULL, 1326d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 13277dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 1328f4259b30SLisandro Dalcin NULL, 132909dc0095SBarry Smith MatGetValues_MPIDense, 1330a76f77c3SStefano Zampini MatCopy_MPIDense, 1331f4259b30SLisandro Dalcin /* 44*/ NULL, 133209dc0095SBarry Smith MatScale_MPIDense, 13332f605a99SJose E. Roman MatShift_MPIDense, 1334f4259b30SLisandro Dalcin NULL, 1335f4259b30SLisandro Dalcin NULL, 133673a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 1337f4259b30SLisandro Dalcin NULL, 1338f4259b30SLisandro Dalcin NULL, 1339f4259b30SLisandro Dalcin NULL, 1340f4259b30SLisandro Dalcin NULL, 1341f4259b30SLisandro Dalcin /* 54*/ NULL, 1342f4259b30SLisandro Dalcin NULL, 1343f4259b30SLisandro Dalcin NULL, 1344f4259b30SLisandro Dalcin NULL, 1345f4259b30SLisandro Dalcin NULL, 13467dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1347b9b97703SBarry Smith MatDestroy_MPIDense, 1348b9b97703SBarry Smith MatView_MPIDense, 1349f4259b30SLisandro Dalcin NULL, 1350f4259b30SLisandro Dalcin NULL, 1351f4259b30SLisandro Dalcin /* 64*/ NULL, 1352f4259b30SLisandro Dalcin NULL, 1353f4259b30SLisandro Dalcin NULL, 1354f4259b30SLisandro Dalcin NULL, 1355f4259b30SLisandro Dalcin NULL, 1356f4259b30SLisandro Dalcin /* 69*/ NULL, 1357f4259b30SLisandro Dalcin NULL, 1358f4259b30SLisandro Dalcin NULL, 1359f4259b30SLisandro Dalcin NULL, 1360f4259b30SLisandro Dalcin NULL, 1361f4259b30SLisandro Dalcin /* 74*/ NULL, 1362f4259b30SLisandro Dalcin NULL, 1363f4259b30SLisandro Dalcin NULL, 1364f4259b30SLisandro Dalcin NULL, 1365f4259b30SLisandro Dalcin NULL, 1366f4259b30SLisandro Dalcin /* 79*/ NULL, 1367f4259b30SLisandro Dalcin NULL, 1368f4259b30SLisandro Dalcin NULL, 1369f4259b30SLisandro Dalcin NULL, 13705bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1371f4259b30SLisandro Dalcin NULL, 1372f4259b30SLisandro Dalcin NULL, 1373f4259b30SLisandro Dalcin NULL, 1374f4259b30SLisandro Dalcin NULL, 1375f4259b30SLisandro Dalcin NULL, 1376f4259b30SLisandro Dalcin /* 89*/ NULL, 1377f4259b30SLisandro Dalcin NULL, 1378f4259b30SLisandro Dalcin NULL, 1379f4259b30SLisandro Dalcin NULL, 1380f4259b30SLisandro Dalcin NULL, 1381f4259b30SLisandro Dalcin /* 94*/ NULL, 1382f4259b30SLisandro Dalcin NULL, 13836718818eSStefano Zampini MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1384cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1385f4259b30SLisandro Dalcin NULL, 13864222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1387f4259b30SLisandro Dalcin NULL, 1388f4259b30SLisandro Dalcin NULL, 1389ba337c44SJed Brown MatConjugate_MPIDense, 1390f4259b30SLisandro Dalcin NULL, 1391f4259b30SLisandro Dalcin /*104*/ NULL, 1392ba337c44SJed Brown MatRealPart_MPIDense, 1393ba337c44SJed Brown MatImaginaryPart_MPIDense, 1394f4259b30SLisandro Dalcin NULL, 1395f4259b30SLisandro Dalcin NULL, 1396f4259b30SLisandro Dalcin /*109*/ NULL, 1397f4259b30SLisandro Dalcin NULL, 1398f4259b30SLisandro Dalcin NULL, 139949a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 14003b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 1401f4259b30SLisandro Dalcin /*114*/ NULL, 1402f4259b30SLisandro Dalcin NULL, 1403f4259b30SLisandro Dalcin NULL, 1404f4259b30SLisandro Dalcin NULL, 1405f4259b30SLisandro Dalcin NULL, 1406f4259b30SLisandro Dalcin /*119*/ NULL, 1407f4259b30SLisandro Dalcin NULL, 1408f4259b30SLisandro Dalcin NULL, 1409f4259b30SLisandro Dalcin NULL, 1410f4259b30SLisandro Dalcin NULL, 1411f4259b30SLisandro Dalcin /*124*/ NULL, 1412a873a8cdSSam Reynolds MatGetColumnReductions_MPIDense, 1413f4259b30SLisandro Dalcin NULL, 1414f4259b30SLisandro Dalcin NULL, 1415f4259b30SLisandro Dalcin NULL, 1416f4259b30SLisandro Dalcin /*129*/ NULL, 1417f4259b30SLisandro Dalcin NULL, 14186718818eSStefano Zampini MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1419cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 1420f4259b30SLisandro Dalcin NULL, 1421f4259b30SLisandro Dalcin /*134*/ NULL, 1422f4259b30SLisandro Dalcin NULL, 1423f4259b30SLisandro Dalcin NULL, 1424f4259b30SLisandro Dalcin NULL, 1425f4259b30SLisandro Dalcin NULL, 1426f4259b30SLisandro Dalcin /*139*/ NULL, 1427f4259b30SLisandro Dalcin NULL, 1428f4259b30SLisandro Dalcin NULL, 1429f4259b30SLisandro Dalcin NULL, 1430f4259b30SLisandro Dalcin NULL, 14314222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 1432f4259b30SLisandro Dalcin /*145*/ NULL, 1433f4259b30SLisandro Dalcin NULL, 143499a7f59eSMark Adams NULL, 143599a7f59eSMark Adams NULL, 14367fb60732SBarry Smith NULL, 14379371c9d4SSatish Balay /*150*/ NULL}; 14388965ea79SLois Curfman McInnes 14399371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) { 1440637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441637a0070SStefano Zampini PetscBool iscuda; 1442a23d5eceSKris Buschelman 1443a23d5eceSKris Buschelman PetscFunctionBegin; 144428b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 14459566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 14469566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 1447637a0070SStefano Zampini if (!a->A) { 14489566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 14499566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 14509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1451637a0070SStefano Zampini } 14529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 14539566063dSJacob Faibussowitsch PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 14549566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1455637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 14566f08ad05SPierre Jolivet mat->assembled = PETSC_TRUE; 1457a23d5eceSKris Buschelman PetscFunctionReturn(0); 1458a23d5eceSKris Buschelman } 1459a23d5eceSKris Buschelman 14609371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 14614e29119aSPierre Jolivet Mat B, C; 14624e29119aSPierre Jolivet 14634e29119aSPierre Jolivet PetscFunctionBegin; 14649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 14659566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 14669566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 14674e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14684e29119aSPierre Jolivet C = *newmat; 14694e29119aSPierre Jolivet } else C = NULL; 14709566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14719566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14724e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14739566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14744e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14754e29119aSPierre Jolivet PetscFunctionReturn(0); 14764e29119aSPierre Jolivet } 14774e29119aSPierre Jolivet 14789371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 14794e29119aSPierre Jolivet Mat B, C; 14804e29119aSPierre Jolivet 14814e29119aSPierre Jolivet PetscFunctionBegin; 14829566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(A, &C)); 14839566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 14844e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14854e29119aSPierre Jolivet C = *newmat; 14864e29119aSPierre Jolivet } else C = NULL; 14879566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14894e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14909566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14914e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14924e29119aSPierre Jolivet PetscFunctionReturn(0); 14934e29119aSPierre Jolivet } 14944e29119aSPierre Jolivet 149565b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 14969371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 14978ea901baSHong Zhang Mat mat_elemental; 149832d7a744SHong Zhang PetscScalar *v; 149932d7a744SHong Zhang PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 15008ea901baSHong Zhang 15018baccfbdSHong Zhang PetscFunctionBegin; 1502378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1503378336b6SHong Zhang mat_elemental = *newmat; 15049566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(*newmat)); 1505378336b6SHong Zhang } else { 15069566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 15079566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 15089566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 15099566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_elemental)); 15109566063dSJacob Faibussowitsch PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1511378336b6SHong Zhang } 1512378336b6SHong Zhang 15139566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &rows, N, &cols)); 151432d7a744SHong Zhang for (i = 0; i < N; i++) cols[i] = i; 151532d7a744SHong Zhang for (i = 0; i < m; i++) rows[i] = rstart + i; 15168ea901baSHong Zhang 1517637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 15189566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(A, &v)); 15199566063dSJacob Faibussowitsch PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 15209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 15219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 15229566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &v)); 15239566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, cols)); 15248ea901baSHong Zhang 1525511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 15269566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &mat_elemental)); 15278ea901baSHong Zhang } else { 15288ea901baSHong Zhang *newmat = mat_elemental; 15298ea901baSHong Zhang } 15308baccfbdSHong Zhang PetscFunctionReturn(0); 15318baccfbdSHong Zhang } 153265b80a83SHong Zhang #endif 15338baccfbdSHong Zhang 15349371c9d4SSatish Balay static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) { 153586aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 153686aefd0dSHong Zhang 153786aefd0dSHong Zhang PetscFunctionBegin; 15389566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumn(mat->A, col, vals)); 153986aefd0dSHong Zhang PetscFunctionReturn(0); 154086aefd0dSHong Zhang } 154186aefd0dSHong Zhang 15429371c9d4SSatish Balay static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) { 154386aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 154486aefd0dSHong Zhang 154586aefd0dSHong Zhang PetscFunctionBegin; 15469566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumn(mat->A, vals)); 154786aefd0dSHong Zhang PetscFunctionReturn(0); 154886aefd0dSHong Zhang } 154986aefd0dSHong Zhang 15509371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 155194e2cb23SJakub Kruzik Mat_MPIDense *mat; 155294e2cb23SJakub Kruzik PetscInt m, nloc, N; 155394e2cb23SJakub Kruzik 155494e2cb23SJakub Kruzik PetscFunctionBegin; 15559566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 15569566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 155794e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 155894e2cb23SJakub Kruzik PetscInt sum; 155994e2cb23SJakub Kruzik 156048a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 156194e2cb23SJakub Kruzik /* Check sum(n) = N */ 15621c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 156308401ef6SPierre Jolivet PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 156494e2cb23SJakub Kruzik 15659566063dSJacob Faibussowitsch PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 15669566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 156794e2cb23SJakub Kruzik } 156894e2cb23SJakub Kruzik 156994e2cb23SJakub Kruzik /* numeric phase */ 157094e2cb23SJakub Kruzik mat = (Mat_MPIDense *)(*outmat)->data; 15719566063dSJacob Faibussowitsch PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 157294e2cb23SJakub Kruzik PetscFunctionReturn(0); 157394e2cb23SJakub Kruzik } 157494e2cb23SJakub Kruzik 1575637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 15769371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1577637a0070SStefano Zampini Mat B; 1578637a0070SStefano Zampini Mat_MPIDense *m; 1579637a0070SStefano Zampini 1580637a0070SStefano Zampini PetscFunctionBegin; 1581637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 15829566063dSJacob Faibussowitsch PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1583637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 15849566063dSJacob Faibussowitsch PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1585637a0070SStefano Zampini } 1586637a0070SStefano Zampini 1587637a0070SStefano Zampini B = *newmat; 15889566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 15899566063dSJacob Faibussowitsch PetscCall(PetscFree(B->defaultvectype)); 15909566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 15919566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 15929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 15939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 15949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 15959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 15969566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 15979566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 15989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 15999566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 16009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 16019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 16029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 16039566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 16049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 16059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1606637a0070SStefano Zampini m = (Mat_MPIDense *)(B)->data; 160748a46eb9SPierre Jolivet if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1608637a0070SStefano Zampini B->ops->bindtocpu = NULL; 1609637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_CPU; 1610637a0070SStefano Zampini PetscFunctionReturn(0); 1611637a0070SStefano Zampini } 1612637a0070SStefano Zampini 16139371c9d4SSatish Balay PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1614637a0070SStefano Zampini Mat B; 1615637a0070SStefano Zampini Mat_MPIDense *m; 1616637a0070SStefano Zampini 1617637a0070SStefano Zampini PetscFunctionBegin; 1618637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 16199566063dSJacob Faibussowitsch PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1620637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 16219566063dSJacob Faibussowitsch PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1622637a0070SStefano Zampini } 1623637a0070SStefano Zampini 1624637a0070SStefano Zampini B = *newmat; 16259566063dSJacob Faibussowitsch PetscCall(PetscFree(B->defaultvectype)); 16269566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 16279566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 16289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 16299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 16309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 16319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 16329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 16339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 16349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 16359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 16369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 16379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 16389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 16399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 16409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 16419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1642f5ff9c66SStefano Zampini m = (Mat_MPIDense *)(B->data); 1643637a0070SStefano Zampini if (m->A) { 16449566063dSJacob Faibussowitsch PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1645637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_BOTH; 1646637a0070SStefano Zampini } else { 1647637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1648637a0070SStefano Zampini } 16499566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1650637a0070SStefano Zampini 1651637a0070SStefano Zampini B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1652637a0070SStefano Zampini PetscFunctionReturn(0); 1653637a0070SStefano Zampini } 1654637a0070SStefano Zampini #endif 1655637a0070SStefano Zampini 16569371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 16576947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 16586947451fSStefano Zampini PetscInt lda; 16596947451fSStefano Zampini 16606947451fSStefano Zampini PetscFunctionBegin; 166128b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 166228b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 166348a46eb9SPierre Jolivet if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 16646947451fSStefano Zampini a->vecinuse = col + 1; 16659566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 16669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 16679566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 16686947451fSStefano Zampini *v = a->cvec; 16696947451fSStefano Zampini PetscFunctionReturn(0); 16706947451fSStefano Zampini } 16716947451fSStefano Zampini 16729371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 16736947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 16746947451fSStefano Zampini 16756947451fSStefano Zampini PetscFunctionBegin; 167628b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 167728b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 16786947451fSStefano Zampini a->vecinuse = 0; 16799566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 16809566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1681742765d3SMatthew Knepley if (v) *v = NULL; 16826947451fSStefano Zampini PetscFunctionReturn(0); 16836947451fSStefano Zampini } 16846947451fSStefano Zampini 16859371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 16866947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 16876947451fSStefano Zampini PetscInt lda; 16886947451fSStefano Zampini 16896947451fSStefano Zampini PetscFunctionBegin; 169028b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 169128b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 169248a46eb9SPierre Jolivet if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 16936947451fSStefano Zampini a->vecinuse = col + 1; 16949566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 16959566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 16969566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 16979566063dSJacob Faibussowitsch PetscCall(VecLockReadPush(a->cvec)); 16986947451fSStefano Zampini *v = a->cvec; 16996947451fSStefano Zampini PetscFunctionReturn(0); 17006947451fSStefano Zampini } 17016947451fSStefano Zampini 17029371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 17036947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 17046947451fSStefano Zampini 17056947451fSStefano Zampini PetscFunctionBegin; 17065f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 17075f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 17086947451fSStefano Zampini a->vecinuse = 0; 17099566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 17109566063dSJacob Faibussowitsch PetscCall(VecLockReadPop(a->cvec)); 17119566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1712742765d3SMatthew Knepley if (v) *v = NULL; 17136947451fSStefano Zampini PetscFunctionReturn(0); 17146947451fSStefano Zampini } 17156947451fSStefano Zampini 17169371c9d4SSatish Balay PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 17176947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 17186947451fSStefano Zampini PetscInt lda; 17196947451fSStefano Zampini 17206947451fSStefano Zampini PetscFunctionBegin; 17215f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 17225f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 172348a46eb9SPierre Jolivet if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 17246947451fSStefano Zampini a->vecinuse = col + 1; 17259566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 17269566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 17279566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 17286947451fSStefano Zampini *v = a->cvec; 17296947451fSStefano Zampini PetscFunctionReturn(0); 17306947451fSStefano Zampini } 17316947451fSStefano Zampini 17329371c9d4SSatish Balay PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 17336947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 17346947451fSStefano Zampini 17356947451fSStefano Zampini PetscFunctionBegin; 17365f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 17375f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 17386947451fSStefano Zampini a->vecinuse = 0; 17399566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 17409566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1741742765d3SMatthew Knepley if (v) *v = NULL; 17426947451fSStefano Zampini PetscFunctionReturn(0); 17436947451fSStefano Zampini } 17446947451fSStefano Zampini 17459371c9d4SSatish Balay PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 17465ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1747616b8fbbSStefano Zampini Mat_MPIDense *c; 1748616b8fbbSStefano Zampini MPI_Comm comm; 1749a2748737SPierre Jolivet PetscInt pbegin, pend; 17505ea7661aSPierre Jolivet 17515ea7661aSPierre Jolivet PetscFunctionBegin; 17529566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 17535f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 17545f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1755a2748737SPierre Jolivet pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1756a2748737SPierre Jolivet pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 17575ea7661aSPierre Jolivet if (!a->cmat) { 17589566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &a->cmat)); 17599566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 17609566063dSJacob Faibussowitsch PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1761a2748737SPierre Jolivet if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1762a2748737SPierre Jolivet else { 1763a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1764a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1765a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1766a2748737SPierre Jolivet } 17679566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 17689566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1769a2748737SPierre Jolivet } else { 1770a2748737SPierre Jolivet PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1771a2748737SPierre Jolivet if (same && a->cmat->rmap->N != A->rmap->N) { 1772a2748737SPierre Jolivet same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1773a2748737SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1774a2748737SPierre Jolivet } 1775a2748737SPierre Jolivet if (!same) { 1776a2748737SPierre Jolivet PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1777a2748737SPierre Jolivet PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1778a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1779a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1780a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1781a2748737SPierre Jolivet } 1782a2748737SPierre Jolivet if (cend - cbegin != a->cmat->cmap->N) { 17839566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 17849566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 17859566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 17869566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 17875ea7661aSPierre Jolivet } 1788a2748737SPierre Jolivet } 1789616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 17905f80ce2aSJacob Faibussowitsch PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1791a2748737SPierre Jolivet PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1792616b8fbbSStefano Zampini a->cmat->preallocated = PETSC_TRUE; 1793616b8fbbSStefano Zampini a->cmat->assembled = PETSC_TRUE; 17945ea7661aSPierre Jolivet a->matinuse = cbegin + 1; 17955ea7661aSPierre Jolivet *v = a->cmat; 17965ea7661aSPierre Jolivet PetscFunctionReturn(0); 17975ea7661aSPierre Jolivet } 17985ea7661aSPierre Jolivet 17999371c9d4SSatish Balay PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) { 18005ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1801616b8fbbSStefano Zampini Mat_MPIDense *c; 18025ea7661aSPierre Jolivet 18035ea7661aSPierre Jolivet PetscFunctionBegin; 18045f80ce2aSJacob Faibussowitsch PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 18055f80ce2aSJacob Faibussowitsch PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 18065f80ce2aSJacob Faibussowitsch PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 18075ea7661aSPierre Jolivet a->matinuse = 0; 1808616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 18099566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1810742765d3SMatthew Knepley if (v) *v = NULL; 18115ea7661aSPierre Jolivet PetscFunctionReturn(0); 18125ea7661aSPierre Jolivet } 18135ea7661aSPierre Jolivet 1814002bc6daSRichard Tran Mills /*MC 1815002bc6daSRichard Tran Mills MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1816002bc6daSRichard Tran Mills 1817002bc6daSRichard Tran Mills Options Database Keys: 1818*11a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1819002bc6daSRichard Tran Mills 1820002bc6daSRichard Tran Mills Level: beginner 1821002bc6daSRichard Tran Mills 1822*11a5261eSBarry Smith .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1823002bc6daSRichard Tran Mills M*/ 18249371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) { 1825273d9f13SBarry Smith Mat_MPIDense *a; 1826273d9f13SBarry Smith 1827273d9f13SBarry Smith PetscFunctionBegin; 18289566063dSJacob Faibussowitsch PetscCall(PetscNewLog(mat, &a)); 1829b0a32e0cSBarry Smith mat->data = (void *)a; 18309566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1831273d9f13SBarry Smith 1832273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1833273d9f13SBarry Smith 1834273d9f13SBarry Smith /* build cache for off array entries formed */ 1835273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 18362205254eSKarl Rupp 18379566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1838273d9f13SBarry Smith 1839273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1840f4259b30SLisandro Dalcin a->lvec = NULL; 1841f4259b30SLisandro Dalcin a->Mvctx = NULL; 1842273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1843273d9f13SBarry Smith 18449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 18459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 18469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 18479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 18489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 18499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 18509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 18519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 18529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 18539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 18549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 18559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 18569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 18579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 18589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 18599566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 18609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 18619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 18629566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 18639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 18649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 18658baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 18669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 18678baccfbdSHong Zhang #endif 1868d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 18699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1870d24d4204SJose E. Roman #endif 1871637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 18729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1873637a0070SStefano Zampini #endif 18749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 18759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 18769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 18776718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 18789566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 18799566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 18806718818eSStefano Zampini #endif 18818949adfdSHong Zhang 18829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 18839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 18849566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1885273d9f13SBarry Smith PetscFunctionReturn(0); 1886273d9f13SBarry Smith } 1887273d9f13SBarry Smith 1888209238afSKris Buschelman /*MC 1889637a0070SStefano Zampini MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1890637a0070SStefano Zampini 1891637a0070SStefano Zampini Options Database Keys: 1892*11a5261eSBarry Smith . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 1893637a0070SStefano Zampini 1894637a0070SStefano Zampini Level: beginner 1895637a0070SStefano Zampini 1896*11a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA` 1897637a0070SStefano Zampini M*/ 1898637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1899a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h> 19009371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) { 1901637a0070SStefano Zampini PetscFunctionBegin; 19029566063dSJacob Faibussowitsch PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 19039566063dSJacob Faibussowitsch PetscCall(MatCreate_MPIDense(B)); 19049566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1905637a0070SStefano Zampini PetscFunctionReturn(0); 1906637a0070SStefano Zampini } 1907637a0070SStefano Zampini #endif 1908637a0070SStefano Zampini 1909637a0070SStefano Zampini /*MC 1910002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1911209238afSKris Buschelman 1912*11a5261eSBarry Smith This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1913*11a5261eSBarry Smith and `MATMPIDENSE` otherwise. 1914209238afSKris Buschelman 1915209238afSKris Buschelman Options Database Keys: 1916*11a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1917209238afSKris Buschelman 1918209238afSKris Buschelman Level: beginner 1919209238afSKris Buschelman 1920c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 19216947451fSStefano Zampini M*/ 19226947451fSStefano Zampini 19236947451fSStefano Zampini /*MC 19246947451fSStefano Zampini MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 19256947451fSStefano Zampini 1926*11a5261eSBarry Smith This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 1927*11a5261eSBarry Smith and `MATMPIDENSECUDA` otherwise. 19286947451fSStefano Zampini 19296947451fSStefano Zampini Options Database Keys: 1930*11a5261eSBarry Smith . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 19316947451fSStefano Zampini 19326947451fSStefano Zampini Level: beginner 19336947451fSStefano Zampini 1934c2e3fba1SPatrick Sanan .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1935209238afSKris Buschelman M*/ 1936209238afSKris Buschelman 1937273d9f13SBarry Smith /*@C 1938273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1939273d9f13SBarry Smith 19408f490ea3SStefano Zampini Collective 1941273d9f13SBarry Smith 1942273d9f13SBarry Smith Input Parameters: 19431c4f3114SJed Brown . B - the matrix 19440298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1945273d9f13SBarry Smith to control all matrix memory allocation. 1946273d9f13SBarry Smith 1947273d9f13SBarry Smith Notes: 1948273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1949273d9f13SBarry Smith storage by columns. 1950273d9f13SBarry Smith 1951273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1952273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 19530298fd71SBarry Smith set data=NULL. 1954273d9f13SBarry Smith 1955273d9f13SBarry Smith Level: intermediate 1956273d9f13SBarry Smith 1957*11a5261eSBarry Smith .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1958273d9f13SBarry Smith @*/ 19599371c9d4SSatish Balay PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) { 1960273d9f13SBarry Smith PetscFunctionBegin; 1961d5ea218eSStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1962cac4c232SBarry Smith PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1963273d9f13SBarry Smith PetscFunctionReturn(0); 1964273d9f13SBarry Smith } 1965273d9f13SBarry Smith 1966d3042a70SBarry Smith /*@ 1967*11a5261eSBarry Smith MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1968d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1969d3042a70SBarry Smith into a matrix 1970d3042a70SBarry Smith 1971d3042a70SBarry Smith Not Collective 1972d3042a70SBarry Smith 1973d3042a70SBarry Smith Input Parameters: 1974d3042a70SBarry Smith + mat - the matrix 1975d3042a70SBarry Smith - array - the array in column major order 1976d3042a70SBarry Smith 1977*11a5261eSBarry Smith Note: 1978*11a5261eSBarry Smith You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1979d3042a70SBarry Smith freed when the matrix is destroyed. 1980d3042a70SBarry Smith 1981d3042a70SBarry Smith Level: developer 1982d3042a70SBarry Smith 1983*11a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1984*11a5261eSBarry Smith `MatDenseReplaceArray()` 1985d3042a70SBarry Smith @*/ 19869371c9d4SSatish Balay PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) { 1987d3042a70SBarry Smith PetscFunctionBegin; 1988d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1989cac4c232SBarry Smith PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 19909566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1991637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1992637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1993637a0070SStefano Zampini #endif 1994d3042a70SBarry Smith PetscFunctionReturn(0); 1995d3042a70SBarry Smith } 1996d3042a70SBarry Smith 1997d3042a70SBarry Smith /*@ 1998*11a5261eSBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1999d3042a70SBarry Smith 2000d3042a70SBarry Smith Not Collective 2001d3042a70SBarry Smith 2002d3042a70SBarry Smith Input Parameters: 2003d3042a70SBarry Smith . mat - the matrix 2004d3042a70SBarry Smith 2005*11a5261eSBarry Smith Note: 2006*11a5261eSBarry Smith You can only call this after a call to `MatDensePlaceArray()` 2007d3042a70SBarry Smith 2008d3042a70SBarry Smith Level: developer 2009d3042a70SBarry Smith 2010*11a5261eSBarry Smith .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2011d3042a70SBarry Smith @*/ 20129371c9d4SSatish Balay PetscErrorCode MatDenseResetArray(Mat mat) { 2013d3042a70SBarry Smith PetscFunctionBegin; 2014d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2015cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 20169566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2017d3042a70SBarry Smith PetscFunctionReturn(0); 2018d3042a70SBarry Smith } 2019d3042a70SBarry Smith 2020d5ea218eSStefano Zampini /*@ 2021d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2022d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2023d5ea218eSStefano Zampini into a matrix 2024d5ea218eSStefano Zampini 2025d5ea218eSStefano Zampini Not Collective 2026d5ea218eSStefano Zampini 2027d5ea218eSStefano Zampini Input Parameters: 2028d5ea218eSStefano Zampini + mat - the matrix 2029d5ea218eSStefano Zampini - array - the array in column major order 2030d5ea218eSStefano Zampini 2031*11a5261eSBarry Smith Note: 2032*11a5261eSBarry Smith The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2033d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 2034d5ea218eSStefano Zampini 2035d5ea218eSStefano Zampini Level: developer 2036d5ea218eSStefano Zampini 2037*11a5261eSBarry Smith .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2038d5ea218eSStefano Zampini @*/ 20399371c9d4SSatish Balay PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) { 2040d5ea218eSStefano Zampini PetscFunctionBegin; 2041d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2042cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 20439566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2044d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 2045d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 2046d5ea218eSStefano Zampini #endif 2047d5ea218eSStefano Zampini PetscFunctionReturn(0); 2048d5ea218eSStefano Zampini } 2049d5ea218eSStefano Zampini 2050637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 20518965ea79SLois Curfman McInnes /*@C 2052*11a5261eSBarry Smith MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2053637a0070SStefano Zampini array provided by the user. This is useful to avoid copying an array 2054637a0070SStefano Zampini into a matrix 2055637a0070SStefano Zampini 2056637a0070SStefano Zampini Not Collective 2057637a0070SStefano Zampini 2058637a0070SStefano Zampini Input Parameters: 2059637a0070SStefano Zampini + mat - the matrix 2060637a0070SStefano Zampini - array - the array in column major order 2061637a0070SStefano Zampini 2062*11a5261eSBarry Smith Note: 2063*11a5261eSBarry Smith You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2064637a0070SStefano Zampini freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2065637a0070SStefano Zampini 2066637a0070SStefano Zampini Level: developer 2067637a0070SStefano Zampini 2068*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2069637a0070SStefano Zampini @*/ 20709371c9d4SSatish Balay PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) { 2071637a0070SStefano Zampini PetscFunctionBegin; 2072d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2073cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 20749566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2075637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2076637a0070SStefano Zampini PetscFunctionReturn(0); 2077637a0070SStefano Zampini } 2078637a0070SStefano Zampini 2079637a0070SStefano Zampini /*@C 2080*11a5261eSBarry Smith MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2081637a0070SStefano Zampini 2082637a0070SStefano Zampini Not Collective 2083637a0070SStefano Zampini 2084637a0070SStefano Zampini Input Parameters: 2085637a0070SStefano Zampini . mat - the matrix 2086637a0070SStefano Zampini 2087*11a5261eSBarry Smith Note: 2088*11a5261eSBarry Smith You can only call this after a call to `MatDenseCUDAPlaceArray()` 2089637a0070SStefano Zampini 2090637a0070SStefano Zampini Level: developer 2091637a0070SStefano Zampini 2092*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2093637a0070SStefano Zampini @*/ 20949371c9d4SSatish Balay PetscErrorCode MatDenseCUDAResetArray(Mat mat) { 2095637a0070SStefano Zampini PetscFunctionBegin; 2096d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2097cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 20989566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2099637a0070SStefano Zampini PetscFunctionReturn(0); 2100637a0070SStefano Zampini } 2101637a0070SStefano Zampini 2102637a0070SStefano Zampini /*@C 2103*11a5261eSBarry Smith MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2104d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2105d5ea218eSStefano Zampini into a matrix 2106d5ea218eSStefano Zampini 2107d5ea218eSStefano Zampini Not Collective 2108d5ea218eSStefano Zampini 2109d5ea218eSStefano Zampini Input Parameters: 2110d5ea218eSStefano Zampini + mat - the matrix 2111d5ea218eSStefano Zampini - array - the array in column major order 2112d5ea218eSStefano Zampini 2113*11a5261eSBarry Smith Note: 2114d5ea218eSStefano Zampini This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2115d5ea218eSStefano Zampini The memory passed in CANNOT be freed by the user. It will be freed 2116d5ea218eSStefano Zampini when the matrix is destroyed. The array should respect the matrix leading dimension. 2117d5ea218eSStefano Zampini 2118d5ea218eSStefano Zampini Level: developer 2119d5ea218eSStefano Zampini 2120db781477SPatrick Sanan .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2121d5ea218eSStefano Zampini @*/ 21229371c9d4SSatish Balay PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) { 2123d5ea218eSStefano Zampini PetscFunctionBegin; 2124d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2125cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 21269566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2127d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2128d5ea218eSStefano Zampini PetscFunctionReturn(0); 2129d5ea218eSStefano Zampini } 2130d5ea218eSStefano Zampini 2131d5ea218eSStefano Zampini /*@C 2132*11a5261eSBarry Smith MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2133637a0070SStefano Zampini 2134637a0070SStefano Zampini Not Collective 2135637a0070SStefano Zampini 2136637a0070SStefano Zampini Input Parameters: 2137637a0070SStefano Zampini . A - the matrix 2138637a0070SStefano Zampini 2139637a0070SStefano Zampini Output Parameters 2140637a0070SStefano Zampini . array - the GPU array in column major order 2141637a0070SStefano Zampini 2142637a0070SStefano Zampini Notes: 2143*11a5261eSBarry Smith The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2144*11a5261eSBarry Smith 2145*11a5261eSBarry Smith The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2146637a0070SStefano Zampini 2147637a0070SStefano Zampini Level: developer 2148637a0070SStefano Zampini 2149*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2150637a0070SStefano Zampini @*/ 21519371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) { 2152637a0070SStefano Zampini PetscFunctionBegin; 2153d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2154cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2156637a0070SStefano Zampini PetscFunctionReturn(0); 2157637a0070SStefano Zampini } 2158637a0070SStefano Zampini 2159637a0070SStefano Zampini /*@C 2160*11a5261eSBarry Smith MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2161637a0070SStefano Zampini 2162637a0070SStefano Zampini Not Collective 2163637a0070SStefano Zampini 2164637a0070SStefano Zampini Input Parameters: 2165637a0070SStefano Zampini + A - the matrix 2166637a0070SStefano Zampini - array - the GPU array in column major order 2167637a0070SStefano Zampini 2168637a0070SStefano Zampini Level: developer 2169637a0070SStefano Zampini 2170*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2171637a0070SStefano Zampini @*/ 21729371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) { 2173637a0070SStefano Zampini PetscFunctionBegin; 2174d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2175cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 21769566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2177637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2178637a0070SStefano Zampini PetscFunctionReturn(0); 2179637a0070SStefano Zampini } 2180637a0070SStefano Zampini 2181637a0070SStefano Zampini /*@C 2182*11a5261eSBarry Smith MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2183637a0070SStefano Zampini 2184637a0070SStefano Zampini Not Collective 2185637a0070SStefano Zampini 2186637a0070SStefano Zampini Input Parameters: 2187637a0070SStefano Zampini . A - the matrix 2188637a0070SStefano Zampini 2189637a0070SStefano Zampini Output Parameters 2190637a0070SStefano Zampini . array - the GPU array in column major order 2191637a0070SStefano Zampini 2192*11a5261eSBarry Smith Note: 2193*11a5261eSBarry Smith Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2194637a0070SStefano Zampini 2195637a0070SStefano Zampini Level: developer 2196637a0070SStefano Zampini 2197*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2198637a0070SStefano Zampini @*/ 21999371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) { 2200637a0070SStefano Zampini PetscFunctionBegin; 2201d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2202cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2203637a0070SStefano Zampini PetscFunctionReturn(0); 2204637a0070SStefano Zampini } 2205637a0070SStefano Zampini 2206637a0070SStefano Zampini /*@C 2207*11a5261eSBarry Smith MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2208637a0070SStefano Zampini 2209637a0070SStefano Zampini Not Collective 2210637a0070SStefano Zampini 2211637a0070SStefano Zampini Input Parameters: 2212637a0070SStefano Zampini + A - the matrix 2213637a0070SStefano Zampini - array - the GPU array in column major order 2214637a0070SStefano Zampini 2215*11a5261eSBarry Smith Note: 2216*11a5261eSBarry Smith Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2217637a0070SStefano Zampini 2218637a0070SStefano Zampini Level: developer 2219637a0070SStefano Zampini 2220*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2221637a0070SStefano Zampini @*/ 22229371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) { 2223637a0070SStefano Zampini PetscFunctionBegin; 2224cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2225637a0070SStefano Zampini PetscFunctionReturn(0); 2226637a0070SStefano Zampini } 2227637a0070SStefano Zampini 2228637a0070SStefano Zampini /*@C 2229*11a5261eSBarry Smith MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2230637a0070SStefano Zampini 2231637a0070SStefano Zampini Not Collective 2232637a0070SStefano Zampini 2233637a0070SStefano Zampini Input Parameters: 2234637a0070SStefano Zampini . A - the matrix 2235637a0070SStefano Zampini 2236637a0070SStefano Zampini Output Parameters 2237637a0070SStefano Zampini . array - the GPU array in column major order 2238637a0070SStefano Zampini 2239*11a5261eSBarry Smith Note: 2240*11a5261eSBarry Smith Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2241637a0070SStefano Zampini 2242637a0070SStefano Zampini Level: developer 2243637a0070SStefano Zampini 2244*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2245637a0070SStefano Zampini @*/ 22469371c9d4SSatish Balay PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) { 2247637a0070SStefano Zampini PetscFunctionBegin; 2248d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2249cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 22509566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2251637a0070SStefano Zampini PetscFunctionReturn(0); 2252637a0070SStefano Zampini } 2253637a0070SStefano Zampini 2254637a0070SStefano Zampini /*@C 2255*11a5261eSBarry Smith MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2256637a0070SStefano Zampini 2257637a0070SStefano Zampini Not Collective 2258637a0070SStefano Zampini 2259637a0070SStefano Zampini Input Parameters: 2260637a0070SStefano Zampini + A - the matrix 2261637a0070SStefano Zampini - array - the GPU array in column major order 2262637a0070SStefano Zampini 2263637a0070SStefano Zampini Level: developer 2264637a0070SStefano Zampini 2265*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2266637a0070SStefano Zampini @*/ 22679371c9d4SSatish Balay PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) { 2268637a0070SStefano Zampini PetscFunctionBegin; 2269d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2270cac4c232SBarry Smith PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 22719566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2272637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2273637a0070SStefano Zampini PetscFunctionReturn(0); 2274637a0070SStefano Zampini } 2275637a0070SStefano Zampini #endif 2276637a0070SStefano Zampini 2277637a0070SStefano Zampini /*@C 2278*11a5261eSBarry Smith MatCreateDense - Creates a matrix in `MATDENSE` format. 22798965ea79SLois Curfman McInnes 2280d083f849SBarry Smith Collective 2281db81eaa0SLois Curfman McInnes 22828965ea79SLois Curfman McInnes Input Parameters: 2283db81eaa0SLois Curfman McInnes + comm - MPI communicator 2284*11a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2285*11a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2286*11a5261eSBarry Smith . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2287*11a5261eSBarry Smith . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2288*11a5261eSBarry Smith - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 2289dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 22908965ea79SLois Curfman McInnes 22918965ea79SLois Curfman McInnes Output Parameter: 2292477f1c0bSLois Curfman McInnes . A - the matrix 22938965ea79SLois Curfman McInnes 2294b259b22eSLois Curfman McInnes Notes: 229539ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 229639ddd567SLois Curfman McInnes storage by columns. 2297*11a5261eSBarry Smith 2298*11a5261eSBarry Smith Although local portions of the matrix are stored in column-major 2299002bc6daSRichard Tran Mills order, the matrix is partitioned across MPI ranks by row. 23008965ea79SLois Curfman McInnes 230118f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 230218f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 2303*11a5261eSBarry Smith set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 230418f449edSLois Curfman McInnes 23058965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 23068965ea79SLois Curfman McInnes (possibly both). 23078965ea79SLois Curfman McInnes 2308027ccd11SLois Curfman McInnes Level: intermediate 2309027ccd11SLois Curfman McInnes 2310*11a5261eSBarry Smith .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 23118965ea79SLois Curfman McInnes @*/ 23129371c9d4SSatish Balay PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 23133a40ed3dSBarry Smith PetscFunctionBegin; 23149566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 23159566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 23166f08ad05SPierre Jolivet PetscCall(MatSetType(*A, MATDENSE)); 23179566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(*A, data)); 23186f08ad05SPierre Jolivet PetscCall(MatMPIDenseSetPreallocation(*A, data)); 23193a40ed3dSBarry Smith PetscFunctionReturn(0); 23208965ea79SLois Curfman McInnes } 23218965ea79SLois Curfman McInnes 2322637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2323637a0070SStefano Zampini /*@C 2324*11a5261eSBarry Smith MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 2325637a0070SStefano Zampini 2326637a0070SStefano Zampini Collective 2327637a0070SStefano Zampini 2328637a0070SStefano Zampini Input Parameters: 2329637a0070SStefano Zampini + comm - MPI communicator 2330*11a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2331*11a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2332*11a5261eSBarry Smith . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2333*11a5261eSBarry Smith . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2334637a0070SStefano Zampini - data - optional location of GPU matrix data. Set data=NULL for PETSc 2335637a0070SStefano Zampini to control matrix memory allocation. 2336637a0070SStefano Zampini 2337637a0070SStefano Zampini Output Parameter: 2338637a0070SStefano Zampini . A - the matrix 2339637a0070SStefano Zampini 2340637a0070SStefano Zampini Level: intermediate 2341637a0070SStefano Zampini 2342*11a5261eSBarry Smith .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 2343637a0070SStefano Zampini @*/ 23449371c9d4SSatish Balay PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2345637a0070SStefano Zampini PetscFunctionBegin; 23469566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 2347637a0070SStefano Zampini PetscValidLogicalCollectiveBool(*A, !!data, 6); 23489566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 23496f08ad05SPierre Jolivet PetscCall(MatSetType(*A, MATDENSECUDA)); 23509566063dSJacob Faibussowitsch PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 23516f08ad05SPierre Jolivet PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2352637a0070SStefano Zampini PetscFunctionReturn(0); 2353637a0070SStefano Zampini } 2354637a0070SStefano Zampini #endif 2355637a0070SStefano Zampini 23569371c9d4SSatish Balay static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 23578965ea79SLois Curfman McInnes Mat mat; 23583501a2bdSLois Curfman McInnes Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 23598965ea79SLois Curfman McInnes 23603a40ed3dSBarry Smith PetscFunctionBegin; 2361f4259b30SLisandro Dalcin *newmat = NULL; 23629566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 23639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 23649566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2365834f8fabSBarry Smith a = (Mat_MPIDense *)mat->data; 23665aa7edbeSHong Zhang 2367d5f3da31SBarry Smith mat->factortype = A->factortype; 2368c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2369273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 23708965ea79SLois Curfman McInnes 2371e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 23723782ba37SSatish Balay a->donotstash = oldmat->donotstash; 2373e04c1aa4SHong Zhang 23749566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 23759566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 23768965ea79SLois Curfman McInnes 23779566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 23789566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 237901b82886SBarry Smith 23808965ea79SLois Curfman McInnes *newmat = mat; 23813a40ed3dSBarry Smith PetscFunctionReturn(0); 23828965ea79SLois Curfman McInnes } 23838965ea79SLois Curfman McInnes 23849371c9d4SSatish Balay PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) { 238587d5ce66SSatish Balay PetscBool isbinary; 238687d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 238787d5ce66SSatish Balay PetscBool ishdf5; 238887d5ce66SSatish Balay #endif 2389eb91f321SVaclav Hapla 2390eb91f321SVaclav Hapla PetscFunctionBegin; 2391eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2392eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2393eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 23949566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 23959566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 239687d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 23979566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 239887d5ce66SSatish Balay #endif 2399eb91f321SVaclav Hapla if (isbinary) { 24009566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2401eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 240287d5ce66SSatish Balay } else if (ishdf5) { 24039566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2404eb91f321SVaclav Hapla #endif 240598921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2406eb91f321SVaclav Hapla PetscFunctionReturn(0); 2407eb91f321SVaclav Hapla } 2408eb91f321SVaclav Hapla 24099371c9d4SSatish Balay static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) { 24106e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 24116e4ee0c6SHong Zhang Mat a, b; 241290ace30eSBarry Smith 24136e4ee0c6SHong Zhang PetscFunctionBegin; 24146e4ee0c6SHong Zhang a = matA->A; 24156e4ee0c6SHong Zhang b = matB->A; 24162cf15c64SPierre Jolivet PetscCall(MatEqual(a, b, flag)); 24172cf15c64SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 24186e4ee0c6SHong Zhang PetscFunctionReturn(0); 24196e4ee0c6SHong Zhang } 242090ace30eSBarry Smith 24219371c9d4SSatish Balay PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) { 24226718818eSStefano Zampini Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2423baa3c1c6SHong Zhang 2424baa3c1c6SHong Zhang PetscFunctionBegin; 24259566063dSJacob Faibussowitsch PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 24269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&atb->atb)); 24279566063dSJacob Faibussowitsch PetscCall(PetscFree(atb)); 2428baa3c1c6SHong Zhang PetscFunctionReturn(0); 2429baa3c1c6SHong Zhang } 2430baa3c1c6SHong Zhang 24319371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) { 24326718818eSStefano Zampini Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2433cc48ffa7SToby Isaac 2434cc48ffa7SToby Isaac PetscFunctionBegin; 24359566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 24369566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 24379566063dSJacob Faibussowitsch PetscCall(PetscFree(abt)); 2438cc48ffa7SToby Isaac PetscFunctionReturn(0); 2439cc48ffa7SToby Isaac } 2440cc48ffa7SToby Isaac 24419371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2442baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 24436718818eSStefano Zampini Mat_TransMatMultDense *atb; 2444cb20be35SHong Zhang MPI_Comm comm; 24456718818eSStefano Zampini PetscMPIInt size, *recvcounts; 24466718818eSStefano Zampini PetscScalar *carray, *sendbuf; 2447637a0070SStefano Zampini const PetscScalar *atbarray; 2448a1f9a5e6SStefano Zampini PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2449e68c0b26SHong Zhang const PetscInt *ranges; 2450cb20be35SHong Zhang 2451cb20be35SHong Zhang PetscFunctionBegin; 24526718818eSStefano Zampini MatCheckProduct(C, 3); 24535f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 24546718818eSStefano Zampini atb = (Mat_TransMatMultDense *)C->product->data; 24556718818eSStefano Zampini recvcounts = atb->recvcounts; 24566718818eSStefano Zampini sendbuf = atb->sendbuf; 24576718818eSStefano Zampini 24589566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 24599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2460e68c0b26SHong Zhang 2461c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 24629566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2463cb20be35SHong Zhang 24649566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 2465cb20be35SHong Zhang 2466660d5466SHong Zhang /* arrange atbarray into sendbuf */ 24679566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 24689566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2469637a0070SStefano Zampini for (proc = 0, k = 0; proc < size; proc++) { 2470baa3c1c6SHong Zhang for (j = 0; j < cN; j++) { 2471a1f9a5e6SStefano Zampini for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2472cb20be35SHong Zhang } 2473cb20be35SHong Zhang } 24749566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2475637a0070SStefano Zampini 2476c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 24779566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 24789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 24799566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 24809566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 24819566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2482cb20be35SHong Zhang PetscFunctionReturn(0); 2483cb20be35SHong Zhang } 2484cb20be35SHong Zhang 24859371c9d4SSatish Balay static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2486cb20be35SHong Zhang MPI_Comm comm; 2487baa3c1c6SHong Zhang PetscMPIInt size; 2488660d5466SHong Zhang PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2489baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 24906718818eSStefano Zampini PetscBool cisdense; 24910acaeb71SStefano Zampini PetscInt i; 24920acaeb71SStefano Zampini const PetscInt *ranges; 2493cb20be35SHong Zhang 2494cb20be35SHong Zhang PetscFunctionBegin; 24958f83a4d9SJacob Faibussowitsch MatCheckProduct(C, 4); 24965f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 24979566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2498cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 249998921bdaSJacob Faibussowitsch SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2500cb20be35SHong Zhang } 2501cb20be35SHong Zhang 25024222ddf1SHong Zhang /* create matrix product C */ 25039566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 25049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 250548a46eb9SPierre Jolivet if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 25069566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2507baa3c1c6SHong Zhang 25084222ddf1SHong Zhang /* create data structure for reuse C */ 25099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 25109566063dSJacob Faibussowitsch PetscCall(PetscNew(&atb)); 25114222ddf1SHong Zhang cM = C->rmap->N; 25129566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 25139566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 25140acaeb71SStefano Zampini for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2515baa3c1c6SHong Zhang 25166718818eSStefano Zampini C->product->data = atb; 25176718818eSStefano Zampini C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2518cb20be35SHong Zhang PetscFunctionReturn(0); 2519cb20be35SHong Zhang } 2520cb20be35SHong Zhang 25219371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2522cc48ffa7SToby Isaac MPI_Comm comm; 2523cc48ffa7SToby Isaac PetscMPIInt i, size; 2524cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2525cc48ffa7SToby Isaac PetscMPIInt tag; 25264222ddf1SHong Zhang PetscInt alg; 2527cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 25284222ddf1SHong Zhang Mat_Product *product = C->product; 25294222ddf1SHong Zhang PetscBool flg; 2530cc48ffa7SToby Isaac 2531cc48ffa7SToby Isaac PetscFunctionBegin; 25326718818eSStefano Zampini MatCheckProduct(C, 4); 25335f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 25344222ddf1SHong Zhang /* check local size of A and B */ 25355f80ce2aSJacob Faibussowitsch PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2536cc48ffa7SToby Isaac 25379566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2538637a0070SStefano Zampini alg = flg ? 0 : 1; 2539cc48ffa7SToby Isaac 25404222ddf1SHong Zhang /* setup matrix product C */ 25419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 25429566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATMPIDENSE)); 25439566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 25449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 25454222ddf1SHong Zhang 25464222ddf1SHong Zhang /* create data structure for reuse C */ 25479566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 25489566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 25499566063dSJacob Faibussowitsch PetscCall(PetscNew(&abt)); 2550cc48ffa7SToby Isaac abt->tag = tag; 2551faa55883SToby Isaac abt->alg = alg; 2552faa55883SToby Isaac switch (alg) { 25534222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2554cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2555cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 25569566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2557faa55883SToby Isaac break; 25584222ddf1SHong Zhang default: /* alg: "allgatherv" */ 25599566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 25609566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2561faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2562faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2563faa55883SToby Isaac break; 2564faa55883SToby Isaac } 2565cc48ffa7SToby Isaac 25666718818eSStefano Zampini C->product->data = abt; 25676718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 25684222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2569cc48ffa7SToby Isaac PetscFunctionReturn(0); 2570cc48ffa7SToby Isaac } 2571cc48ffa7SToby Isaac 25729371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) { 2573cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 25746718818eSStefano Zampini Mat_MatTransMultDense *abt; 2575cc48ffa7SToby Isaac MPI_Comm comm; 2576cc48ffa7SToby Isaac PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2577f4259b30SLisandro Dalcin PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2578cc48ffa7SToby Isaac PetscInt i, cK = A->cmap->N, k, j, bn; 2579cc48ffa7SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2580637a0070SStefano Zampini const PetscScalar *av, *bv; 258123fff9afSBarry Smith PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2582cc48ffa7SToby Isaac MPI_Request reqs[2]; 2583cc48ffa7SToby Isaac const PetscInt *ranges; 2584cc48ffa7SToby Isaac 2585cc48ffa7SToby Isaac PetscFunctionBegin; 25866718818eSStefano Zampini MatCheckProduct(C, 3); 25875f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 25886718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 25899566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 25909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 25919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 25929566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 25939566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 25949566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 25959566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 25969566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 25979566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &i)); 25989566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &blda)); 25999566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 26009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 26019566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &ranges)); 2602cc48ffa7SToby Isaac bn = B->rmap->n; 2603637a0070SStefano Zampini if (blda == bn) { 2604637a0070SStefano Zampini sendbuf = (PetscScalar *)bv; 2605cc48ffa7SToby Isaac } else { 2606cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2607cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2608ad540459SPierre Jolivet for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2609cc48ffa7SToby Isaac } 2610cc48ffa7SToby Isaac } 2611cc48ffa7SToby Isaac if (size > 1) { 2612cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2613cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2614cc48ffa7SToby Isaac } else { 2615cc48ffa7SToby Isaac sendto = recvfrom = 0; 2616cc48ffa7SToby Isaac } 26179566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 26189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2619cc48ffa7SToby Isaac recvisfrom = rank; 2620cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2621cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2622cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2623cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2624cc48ffa7SToby Isaac 2625cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2626cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2627cc48ffa7SToby Isaac sendsiz = cK * bn; 2628cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2629cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 26309566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 26319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2632cc48ffa7SToby Isaac } 2633cc48ffa7SToby Isaac 2634cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 26359566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2636792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2637cc48ffa7SToby Isaac 2638cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2639cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 26409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2641cc48ffa7SToby Isaac } 2642cc48ffa7SToby Isaac bn = nextbn; 2643cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2644cc48ffa7SToby Isaac sendbuf = recvbuf; 2645cc48ffa7SToby Isaac } 26469566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 26479566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 26489566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 26499566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 26509566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2651cc48ffa7SToby Isaac PetscFunctionReturn(0); 2652cc48ffa7SToby Isaac } 2653cc48ffa7SToby Isaac 26549371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) { 2655faa55883SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 26566718818eSStefano Zampini Mat_MatTransMultDense *abt; 2657faa55883SToby Isaac MPI_Comm comm; 2658637a0070SStefano Zampini PetscMPIInt size; 2659637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2660637a0070SStefano Zampini const PetscScalar *av, *bv; 2661637a0070SStefano Zampini PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2662faa55883SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2663637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2664faa55883SToby Isaac 2665faa55883SToby Isaac PetscFunctionBegin; 26666718818eSStefano Zampini MatCheckProduct(C, 3); 26675f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 26686718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 26699566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 26709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 26719566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 26729566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 26739566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 26749566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 26759566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 26769566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &blda)); 26779566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 26789566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 2679faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2680faa55883SToby Isaac bn = B->rmap->n; 2681faa55883SToby Isaac sendbuf = abt->buf[0]; 2682faa55883SToby Isaac recvbuf = abt->buf[1]; 2683faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2684ad540459SPierre Jolivet for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2685faa55883SToby Isaac } 26869566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 26879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 26889566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 26899566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 26909566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2691792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 26929566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 26939566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 26949566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 26959566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 26969566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2697faa55883SToby Isaac PetscFunctionReturn(0); 2698faa55883SToby Isaac } 2699faa55883SToby Isaac 27009371c9d4SSatish Balay static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 27016718818eSStefano Zampini Mat_MatTransMultDense *abt; 2702faa55883SToby Isaac 2703faa55883SToby Isaac PetscFunctionBegin; 27046718818eSStefano Zampini MatCheckProduct(C, 3); 27055f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 27066718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 2707faa55883SToby Isaac switch (abt->alg) { 27089371c9d4SSatish Balay case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break; 27099371c9d4SSatish Balay default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break; 2710faa55883SToby Isaac } 2711faa55883SToby Isaac PetscFunctionReturn(0); 2712faa55883SToby Isaac } 2713faa55883SToby Isaac 27149371c9d4SSatish Balay PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) { 27156718818eSStefano Zampini Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2716320f2790SHong Zhang 2717320f2790SHong Zhang PetscFunctionBegin; 27189566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ce)); 27199566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ae)); 27209566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Be)); 27219566063dSJacob Faibussowitsch PetscCall(PetscFree(ab)); 2722320f2790SHong Zhang PetscFunctionReturn(0); 2723320f2790SHong Zhang } 2724320f2790SHong Zhang 27255242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 27269371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 27276718818eSStefano Zampini Mat_MatMultDense *ab; 2728320f2790SHong Zhang 2729320f2790SHong Zhang PetscFunctionBegin; 27306718818eSStefano Zampini MatCheckProduct(C, 3); 27315f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 27326718818eSStefano Zampini ab = (Mat_MatMultDense *)C->product->data; 27339566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 27349566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 27359566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 27369566063dSJacob Faibussowitsch PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2737320f2790SHong Zhang PetscFunctionReturn(0); 2738320f2790SHong Zhang } 2739320f2790SHong Zhang 27409371c9d4SSatish Balay static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2741320f2790SHong Zhang Mat Ae, Be, Ce; 2742320f2790SHong Zhang Mat_MatMultDense *ab; 2743320f2790SHong Zhang 2744320f2790SHong Zhang PetscFunctionBegin; 27456718818eSStefano Zampini MatCheckProduct(C, 4); 27465f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 27474222ddf1SHong Zhang /* check local size of A and B */ 2748320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 274998921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2750320f2790SHong Zhang } 2751320f2790SHong Zhang 27524222ddf1SHong Zhang /* create elemental matrices Ae and Be */ 27539566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 27549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 27559566063dSJacob Faibussowitsch PetscCall(MatSetType(Ae, MATELEMENTAL)); 27569566063dSJacob Faibussowitsch PetscCall(MatSetUp(Ae)); 27579566063dSJacob Faibussowitsch PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2758320f2790SHong Zhang 27599566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 27609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 27619566063dSJacob Faibussowitsch PetscCall(MatSetType(Be, MATELEMENTAL)); 27629566063dSJacob Faibussowitsch PetscCall(MatSetUp(Be)); 27639566063dSJacob Faibussowitsch PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2764320f2790SHong Zhang 27654222ddf1SHong Zhang /* compute symbolic Ce = Ae*Be */ 27669566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 27679566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 27684222ddf1SHong Zhang 27694222ddf1SHong Zhang /* setup C */ 27709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 27719566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATDENSE)); 27729566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2773320f2790SHong Zhang 2774320f2790SHong Zhang /* create data structure for reuse Cdense */ 27759566063dSJacob Faibussowitsch PetscCall(PetscNew(&ab)); 2776320f2790SHong Zhang ab->Ae = Ae; 2777320f2790SHong Zhang ab->Be = Be; 2778320f2790SHong Zhang ab->Ce = Ce; 27796718818eSStefano Zampini 27806718818eSStefano Zampini C->product->data = ab; 27816718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 27824222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2783320f2790SHong Zhang PetscFunctionReturn(0); 2784320f2790SHong Zhang } 27854222ddf1SHong Zhang #endif 27864222ddf1SHong Zhang /* ----------------------------------------------- */ 27874222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 27889371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) { 2789320f2790SHong Zhang PetscFunctionBegin; 27904222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 27914222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 2792320f2790SHong Zhang PetscFunctionReturn(0); 2793320f2790SHong Zhang } 27945242a7b1SHong Zhang #endif 279586aefd0dSHong Zhang 27969371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) { 27974222ddf1SHong Zhang Mat_Product *product = C->product; 27984222ddf1SHong Zhang Mat A = product->A, B = product->B; 27994222ddf1SHong Zhang 28004222ddf1SHong Zhang PetscFunctionBegin; 28014222ddf1SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 280298921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 28036718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 28046718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_AtB; 28054222ddf1SHong Zhang PetscFunctionReturn(0); 28064222ddf1SHong Zhang } 28074222ddf1SHong Zhang 28089371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) { 28094222ddf1SHong Zhang Mat_Product *product = C->product; 28104222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv", "cyclic"}; 28114222ddf1SHong Zhang PetscInt alg, nalg = 2; 28124222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28134222ddf1SHong Zhang 28144222ddf1SHong Zhang PetscFunctionBegin; 28154222ddf1SHong Zhang /* Set default algorithm */ 28164222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 28179566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 281848a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 28194222ddf1SHong Zhang 28204222ddf1SHong Zhang /* Get runtime option */ 28214222ddf1SHong Zhang if (product->api_user) { 2822d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 28239566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2824d0609cedSBarry Smith PetscOptionsEnd(); 28254222ddf1SHong Zhang } else { 2826d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 28279566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2828d0609cedSBarry Smith PetscOptionsEnd(); 28294222ddf1SHong Zhang } 283048a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 28314222ddf1SHong Zhang 28324222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 28334222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 28344222ddf1SHong Zhang PetscFunctionReturn(0); 28354222ddf1SHong Zhang } 28364222ddf1SHong Zhang 28379371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) { 28384222ddf1SHong Zhang Mat_Product *product = C->product; 28394222ddf1SHong Zhang 28404222ddf1SHong Zhang PetscFunctionBegin; 28414222ddf1SHong Zhang switch (product->type) { 28424222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 28439371c9d4SSatish Balay case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break; 28444222ddf1SHong Zhang #endif 28459371c9d4SSatish Balay case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break; 28469371c9d4SSatish Balay case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break; 28479371c9d4SSatish Balay default: break; 28484222ddf1SHong Zhang } 28494222ddf1SHong Zhang PetscFunctionReturn(0); 28504222ddf1SHong Zhang } 2851