1ed3cc1f0SBarry Smith /* 2ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 347d993e7Ssuyashtn Portions of this code are under: 447d993e7Ssuyashtn Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5ed3cc1f0SBarry Smith */ 6ed3cc1f0SBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 9baa3c1c6SHong Zhang #include <petscblaslapack.h> 108965ea79SLois Curfman McInnes 11ab92ecdeSBarry Smith /*@ 1211a5261eSBarry Smith MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 162ef1f0ffSBarry Smith . A - the sequential or MPI `MATDENSE` matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 231cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24ab92ecdeSBarry Smith @*/ 25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26d71ae5a4SJacob Faibussowitsch { 27ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31d5ea218eSStefano Zampini PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 324f572ea9SToby Isaac PetscAssertPointer(B, 2); 339566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 342205254eSKarl Rupp if (flg) *B = mat->A; 35f140bc17SStefano Zampini else { 369566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 3728b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38f140bc17SStefano Zampini *B = A; 39f140bc17SStefano Zampini } 403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41ab92ecdeSBarry Smith } 42ab92ecdeSBarry Smith 4366976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44d71ae5a4SJacob Faibussowitsch { 45a76f77c3SStefano Zampini Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46a76f77c3SStefano Zampini Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47a76f77c3SStefano Zampini 48a76f77c3SStefano Zampini PetscFunctionBegin; 499566063dSJacob Faibussowitsch PetscCall(MatCopy(Amat->A, Bmat->A, s)); 503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51a76f77c3SStefano Zampini } 52a76f77c3SStefano Zampini 53d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54d71ae5a4SJacob Faibussowitsch { 552f605a99SJose E. Roman Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 562f605a99SJose E. Roman PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 572f605a99SJose E. Roman PetscScalar *v; 582f605a99SJose E. Roman 592f605a99SJose E. Roman PetscFunctionBegin; 602f605a99SJose E. Roman PetscCall(MatDenseGetArray(mat->A, &v)); 612f605a99SJose E. Roman PetscCall(MatDenseGetLDA(mat->A, &lda)); 622f605a99SJose E. Roman rend2 = PetscMin(rend, A->cmap->N); 632f605a99SJose E. Roman if (rend2 > rstart) { 642f605a99SJose E. Roman for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 652f605a99SJose E. Roman PetscCall(PetscLogFlops(rend2 - rstart)); 662f605a99SJose E. Roman } 672f605a99SJose E. Roman PetscCall(MatDenseRestoreArray(mat->A, &v)); 683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 692f605a99SJose E. Roman } 702f605a99SJose E. Roman 7166976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72d71ae5a4SJacob Faibussowitsch { 73ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74d0f46423SBarry Smith PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75ba8c8a56SBarry Smith 76ba8c8a56SBarry Smith PetscFunctionBegin; 77aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78ba8c8a56SBarry Smith lrow = row - rstart; 799566063dSJacob Faibussowitsch PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 81ba8c8a56SBarry Smith } 82ba8c8a56SBarry Smith 8366976f2fSJacob Faibussowitsch static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84d71ae5a4SJacob Faibussowitsch { 85637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86637a0070SStefano Zampini PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87ba8c8a56SBarry Smith 88ba8c8a56SBarry Smith PetscFunctionBegin; 89aed4548fSBarry Smith PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90637a0070SStefano Zampini lrow = row - rstart; 919566063dSJacob Faibussowitsch PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 93ba8c8a56SBarry Smith } 94ba8c8a56SBarry Smith 9566976f2fSJacob Faibussowitsch static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96d71ae5a4SJacob Faibussowitsch { 970de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98d0f46423SBarry Smith PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 9987828ca2SBarry Smith PetscScalar *array; 1000de54da6SSatish Balay MPI_Comm comm; 1014b6ae80fSStefano Zampini PetscBool flg; 10211bd1e4dSLisandro Dalcin Mat B; 1030de54da6SSatish Balay 1040de54da6SSatish Balay PetscFunctionBegin; 1059566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &flg)); 10628b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 1079566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108616b8fbbSStefano Zampini if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA) 1109566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 11128b400f6SJacob Faibussowitsch PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP) 11347d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 11447d993e7Ssuyashtn PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 11547d993e7Ssuyashtn #endif 116f4f49eeaSPierre Jolivet PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 1179566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &B)); 1189566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, m, m, m, m)); 1199566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 1209566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 1219566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 1229566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 1239566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 12411bd1e4dSLisandro Dalcin *a = B; 1259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1262205254eSKarl Rupp } else *a = B; 1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1280de54da6SSatish Balay } 1290de54da6SSatish Balay 13066976f2fSJacob Faibussowitsch static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131d71ae5a4SJacob Faibussowitsch { 13239b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1358965ea79SLois Curfman McInnes 1363a40ed3dSBarry Smith PetscFunctionBegin; 1378965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 1385ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 13908401ef6SPierre Jolivet PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 1408965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1418965ea79SLois Curfman McInnes row = idxm[i] - rstart; 14239b7565bSBarry Smith if (roworiented) { 143a80ceddfSStefano Zampini PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv)); 1443a40ed3dSBarry Smith } else { 1458965ea79SLois Curfman McInnes for (j = 0; j < n; j++) { 1465ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 14708401ef6SPierre Jolivet PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148a80ceddfSStefano Zampini PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv)); 14939b7565bSBarry Smith } 1508965ea79SLois Curfman McInnes } 1512205254eSKarl Rupp } else if (!A->donotstash) { 1525080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 15339b7565bSBarry Smith if (roworiented) { 154a80ceddfSStefano Zampini PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE)); 155d36fbae8SSatish Balay } else { 156a80ceddfSStefano Zampini PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE)); 15739b7565bSBarry Smith } 158b49de8d1SLois Curfman McInnes } 159b49de8d1SLois Curfman McInnes } 1603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 161b49de8d1SLois Curfman McInnes } 162b49de8d1SLois Curfman McInnes 16366976f2fSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164d71ae5a4SJacob Faibussowitsch { 165b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166d0f46423SBarry Smith PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167b49de8d1SLois Curfman McInnes 1683a40ed3dSBarry Smith PetscFunctionBegin; 169b49de8d1SLois Curfman McInnes for (i = 0; i < m; i++) { 17054c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 17154c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 173b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 174b49de8d1SLois Curfman McInnes for (j = 0; j < n; j++) { 17554c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 17654c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 1779566063dSJacob Faibussowitsch PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178b49de8d1SLois Curfman McInnes } 179e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 1808965ea79SLois Curfman McInnes } 1813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1828965ea79SLois Curfman McInnes } 1838965ea79SLois Curfman McInnes 184d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185d71ae5a4SJacob Faibussowitsch { 18649a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 18749a6ff4bSBarry Smith 18849a6ff4bSBarry Smith PetscFunctionBegin; 1899566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, lda)); 1903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19149a6ff4bSBarry Smith } 19249a6ff4bSBarry Smith 193d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194d71ae5a4SJacob Faibussowitsch { 195ad16ce7aSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 19647d993e7Ssuyashtn MatType mtype = MATSEQDENSE; 197ad16ce7aSStefano Zampini 198ad16ce7aSStefano Zampini PetscFunctionBegin; 19917359960SJose E. Roman if (!a->A) { 20028b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2019566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 2029566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 2039566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 2049566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA) 20647d993e7Ssuyashtn PetscBool iscuda; 2079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 20847d993e7Ssuyashtn if (iscuda) mtype = MATSEQDENSECUDA; 209a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP) 21047d993e7Ssuyashtn PetscBool iship; 21147d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 21247d993e7Ssuyashtn if (iship) mtype = MATSEQDENSEHIP; 21347d993e7Ssuyashtn #endif 21447d993e7Ssuyashtn PetscCall(MatSetType(a->A, mtype)); 21517359960SJose E. Roman } 2169566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(a->A, lda)); 2173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 218ad16ce7aSStefano Zampini } 219ad16ce7aSStefano Zampini 220d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221d71ae5a4SJacob Faibussowitsch { 222ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223ff14e315SSatish Balay 2243a40ed3dSBarry Smith PetscFunctionBegin; 22528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2269566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, array)); 2273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 228ff14e315SSatish Balay } 229ff14e315SSatish Balay 230f1e99121SPierre Jolivet static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231d71ae5a4SJacob Faibussowitsch { 2328572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2338572280aSBarry Smith 2348572280aSBarry Smith PetscFunctionBegin; 23528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236f1e99121SPierre Jolivet PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 2373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2388572280aSBarry Smith } 2398572280aSBarry Smith 240d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241d71ae5a4SJacob Faibussowitsch { 2426947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 2436947451fSStefano Zampini 2446947451fSStefano Zampini PetscFunctionBegin; 24528b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2469566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, array)); 2473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2486947451fSStefano Zampini } 2496947451fSStefano Zampini 250d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251d71ae5a4SJacob Faibussowitsch { 252d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253d3042a70SBarry Smith 254d3042a70SBarry Smith PetscFunctionBegin; 25528b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 25628b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2579566063dSJacob Faibussowitsch PetscCall(MatDensePlaceArray(a->A, array)); 2583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 259d3042a70SBarry Smith } 260d3042a70SBarry Smith 261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262d71ae5a4SJacob Faibussowitsch { 263d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264d3042a70SBarry Smith 265d3042a70SBarry Smith PetscFunctionBegin; 26628b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 26728b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2689566063dSJacob Faibussowitsch PetscCall(MatDenseResetArray(a->A)); 2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 270d3042a70SBarry Smith } 271d3042a70SBarry Smith 272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273d71ae5a4SJacob Faibussowitsch { 274d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275d5ea218eSStefano Zampini 276d5ea218eSStefano Zampini PetscFunctionBegin; 27728b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 27828b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 2799566063dSJacob Faibussowitsch PetscCall(MatDenseReplaceArray(a->A, array)); 2803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 281d5ea218eSStefano Zampini } 282d5ea218eSStefano Zampini 283d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284d71ae5a4SJacob Faibussowitsch { 285ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286637a0070SStefano Zampini PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 2875d0c19d7SBarry Smith const PetscInt *irow, *icol; 288637a0070SStefano Zampini const PetscScalar *v; 289637a0070SStefano Zampini PetscScalar *bv; 290ca3fa75bSLois Curfman McInnes Mat newmat; 2914aa3045dSJed Brown IS iscol_local; 29242a884f0SBarry Smith MPI_Comm comm_is, comm_mat; 293ca3fa75bSLois Curfman McInnes 294ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 2959566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 2969566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 29708401ef6SPierre Jolivet PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 29842a884f0SBarry Smith 2999566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 3009566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &irow)); 3019566063dSJacob Faibussowitsch PetscCall(ISGetIndices(iscol_local, &icol)); 3029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isrow, &nrows)); 3039566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &ncols)); 3049566063dSJacob Faibussowitsch PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305ca3fa75bSLois Curfman McInnes 306ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 307da81f932SPierre Jolivet to confirm that it is OK. ... Currently supports only submatrix same partitioning as 3087eba5e9cSLois Curfman McInnes original matrix! */ 309ca3fa75bSLois Curfman McInnes 3109566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 3119566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312ca3fa75bSLois Curfman McInnes 313ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 314ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 315e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 3167eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 317ca3fa75bSLois Curfman McInnes newmat = *B; 318ca3fa75bSLois Curfman McInnes } else { 319ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 3209566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 3219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 3229566063dSJacob Faibussowitsch PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 3239566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324ca3fa75bSLois Curfman McInnes } 325ca3fa75bSLois Curfman McInnes 326ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 327ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense *)newmat->data; 3289566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(newmatd->A, &bv)); 3299566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mat->A, &v)); 3309566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mat->A, &lda)); 3314aa3045dSJed Brown for (i = 0; i < Ncols; i++) { 332637a0070SStefano Zampini const PetscScalar *av = v + lda * icol[i]; 333ad540459SPierre Jolivet for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334ca3fa75bSLois Curfman McInnes } 3359566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 3369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337ca3fa75bSLois Curfman McInnes 338ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 3399566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 3409566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341ca3fa75bSLois Curfman McInnes 342ca3fa75bSLois Curfman McInnes /* Free work space */ 3439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &irow)); 3449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol_local, &icol)); 3459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 346ca3fa75bSLois Curfman McInnes *B = newmat; 3473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 348ca3fa75bSLois Curfman McInnes } 349ca3fa75bSLois Curfman McInnes 35066976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351d71ae5a4SJacob Faibussowitsch { 35273a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 35373a71a0fSBarry Smith 3543a40ed3dSBarry Smith PetscFunctionBegin; 3559566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, array)); 3563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 357ff14e315SSatish Balay } 358ff14e315SSatish Balay 359f1e99121SPierre Jolivet static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360d71ae5a4SJacob Faibussowitsch { 3618572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3628572280aSBarry Smith 3638572280aSBarry Smith PetscFunctionBegin; 364f1e99121SPierre Jolivet PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 3653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3668572280aSBarry Smith } 3678572280aSBarry Smith 36866976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369d71ae5a4SJacob Faibussowitsch { 3706947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 3716947451fSStefano Zampini 3726947451fSStefano Zampini PetscFunctionBegin; 3739566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 3743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3756947451fSStefano Zampini } 3766947451fSStefano Zampini 37766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378d71ae5a4SJacob Faibussowitsch { 37939ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 38013f74950SBarry Smith PetscInt nstash, reallocs; 3818965ea79SLois Curfman McInnes 3823a40ed3dSBarry Smith PetscFunctionBegin; 3833ba16761SJacob Faibussowitsch if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 3848965ea79SLois Curfman McInnes 3859566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 3869566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 3879566063dSJacob Faibussowitsch PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 3883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3898965ea79SLois Curfman McInnes } 3908965ea79SLois Curfman McInnes 39166976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392d71ae5a4SJacob Faibussowitsch { 39339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 39413f74950SBarry Smith PetscInt i, *row, *col, flg, j, rstart, ncols; 39513f74950SBarry Smith PetscMPIInt n; 39687828ca2SBarry Smith PetscScalar *val; 3978965ea79SLois Curfman McInnes 3983a40ed3dSBarry Smith PetscFunctionBegin; 399910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 4008965ea79SLois Curfman McInnes /* wait on receives */ 4017ef1d9bdSSatish Balay while (1) { 4029566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 4037ef1d9bdSSatish Balay if (!flg) break; 4048965ea79SLois Curfman McInnes 4057ef1d9bdSSatish Balay for (i = 0; i < n;) { 4067ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 4072205254eSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 4082205254eSKarl Rupp if (row[j] != rstart) break; 4092205254eSKarl Rupp } 4107ef1d9bdSSatish Balay if (j < n) ncols = j - i; 4117ef1d9bdSSatish Balay else ncols = n - i; 4127ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 4139566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 4147ef1d9bdSSatish Balay i = j; 4158965ea79SLois Curfman McInnes } 4167ef1d9bdSSatish Balay } 4179566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418910cf402Sprj- } 4198965ea79SLois Curfman McInnes 4209566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mdn->A, mode)); 4219566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mdn->A, mode)); 4223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4238965ea79SLois Curfman McInnes } 4248965ea79SLois Curfman McInnes 42566976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426d71ae5a4SJacob Faibussowitsch { 42739ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 4283a40ed3dSBarry Smith 4293a40ed3dSBarry Smith PetscFunctionBegin; 4309566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 4313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4328965ea79SLois Curfman McInnes } 4338965ea79SLois Curfman McInnes 43466976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435d71ae5a4SJacob Faibussowitsch { 43639ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437637a0070SStefano Zampini PetscInt i, len, *lrows; 438637a0070SStefano Zampini 439637a0070SStefano Zampini PetscFunctionBegin; 440637a0070SStefano Zampini /* get locally owned rows */ 4419566063dSJacob Faibussowitsch PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442dd8e379bSPierre Jolivet /* fix right-hand side if needed */ 443637a0070SStefano Zampini if (x && b) { 44497b48c8fSBarry Smith const PetscScalar *xx; 44597b48c8fSBarry Smith PetscScalar *bb; 4468965ea79SLois Curfman McInnes 4479566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 4489566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(b, &bb)); 449637a0070SStefano Zampini for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 4509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 4519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(b, &bb)); 45297b48c8fSBarry Smith } 4539566063dSJacob Faibussowitsch PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454e2eb51b1SBarry Smith if (diag != 0.0) { 455637a0070SStefano Zampini Vec d; 456b9679d65SBarry Smith 4579566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, NULL, &d)); 4589566063dSJacob Faibussowitsch PetscCall(VecSet(d, diag)); 4599566063dSJacob Faibussowitsch PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 4609566063dSJacob Faibussowitsch PetscCall(VecDestroy(&d)); 461b9679d65SBarry Smith } 4629566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 4633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4648965ea79SLois Curfman McInnes } 4658965ea79SLois Curfman McInnes 466*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470*0be0d8bdSHansol Suh 47166976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472d71ae5a4SJacob Faibussowitsch { 47339ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474637a0070SStefano Zampini const PetscScalar *ax; 475637a0070SStefano Zampini PetscScalar *ay; 476d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 477c456f294SBarry Smith 4783a40ed3dSBarry Smith PetscFunctionBegin; 4796f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 4809566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 4819566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 4829566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 4839566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 4849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 4859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 4869566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 4873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4888965ea79SLois Curfman McInnes } 4898965ea79SLois Curfman McInnes 490*0be0d8bdSHansol Suh static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 491*0be0d8bdSHansol Suh { 492*0be0d8bdSHansol Suh Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493*0be0d8bdSHansol Suh const PetscScalar *ax; 494*0be0d8bdSHansol Suh PetscScalar *ay; 495*0be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 496*0be0d8bdSHansol Suh 497*0be0d8bdSHansol Suh PetscFunctionBegin; 498*0be0d8bdSHansol Suh if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499*0be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500*0be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501*0be0d8bdSHansol Suh PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502*0be0d8bdSHansol Suh PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505*0be0d8bdSHansol Suh PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 506*0be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 507*0be0d8bdSHansol Suh } 508*0be0d8bdSHansol Suh 50966976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 510d71ae5a4SJacob Faibussowitsch { 51139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 512637a0070SStefano Zampini const PetscScalar *ax; 513637a0070SStefano Zampini PetscScalar *ay; 514d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 515c456f294SBarry Smith 5163a40ed3dSBarry Smith PetscFunctionBegin; 5176f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 5189566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 5199566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 5209566063dSJacob Faibussowitsch PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 5219566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 5229566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 5239566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 5249566063dSJacob Faibussowitsch PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 5253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 5268965ea79SLois Curfman McInnes } 5278965ea79SLois Curfman McInnes 528*0be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 529*0be0d8bdSHansol Suh { 530*0be0d8bdSHansol Suh Mat_MPIDense *a = (Mat_MPIDense *)A->data; 531*0be0d8bdSHansol Suh const PetscScalar *ax; 532*0be0d8bdSHansol Suh PetscScalar *ay; 533*0be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 534*0be0d8bdSHansol Suh 535*0be0d8bdSHansol Suh PetscFunctionBegin; 536*0be0d8bdSHansol Suh if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 537*0be0d8bdSHansol Suh PetscCall(VecSet(yy, 0.0)); 538*0be0d8bdSHansol Suh PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 539*0be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 540*0be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 541*0be0d8bdSHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 542*0be0d8bdSHansol Suh PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 543*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 544*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 545*0be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 546*0be0d8bdSHansol Suh } 547*0be0d8bdSHansol Suh 548459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 549d71ae5a4SJacob Faibussowitsch { 550096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 551637a0070SStefano Zampini const PetscScalar *ax; 552637a0070SStefano Zampini PetscScalar *ay; 553d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 5544d86920dSPierre Jolivet 5553a40ed3dSBarry Smith PetscFunctionBegin; 5566f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 5579566063dSJacob Faibussowitsch PetscCall(VecSet(yy, 0.0)); 558459e8d23SBlanca Mellado Pinto if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 559459e8d23SBlanca Mellado Pinto else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 5609566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 5619566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 5629566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 5639566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 5649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 5659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 5663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 567096963f5SLois Curfman McInnes } 568096963f5SLois Curfman McInnes 569*0be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 570*0be0d8bdSHansol Suh { 571*0be0d8bdSHansol Suh Mat_MPIDense *a = (Mat_MPIDense *)A->data; 572*0be0d8bdSHansol Suh const PetscScalar *ax; 573*0be0d8bdSHansol Suh PetscScalar *ay; 574*0be0d8bdSHansol Suh PetscMemType axmtype, aymtype; 575*0be0d8bdSHansol Suh 576*0be0d8bdSHansol Suh PetscFunctionBegin; 577*0be0d8bdSHansol Suh if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 578*0be0d8bdSHansol Suh PetscCall(VecCopy(yy, zz)); 579*0be0d8bdSHansol Suh PetscMPIInt rank; 580*0be0d8bdSHansol Suh PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 581*0be0d8bdSHansol Suh PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 582*0be0d8bdSHansol Suh PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 583*0be0d8bdSHansol Suh PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 584*0be0d8bdSHansol Suh PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 585*0be0d8bdSHansol Suh PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 586*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 587*0be0d8bdSHansol Suh PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 588*0be0d8bdSHansol Suh PetscFunctionReturn(PETSC_SUCCESS); 589*0be0d8bdSHansol Suh } 590*0be0d8bdSHansol Suh 591459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 592d71ae5a4SJacob Faibussowitsch { 593096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 594637a0070SStefano Zampini const PetscScalar *ax; 595637a0070SStefano Zampini PetscScalar *ay; 596d0295fc0SJunchao Zhang PetscMemType axmtype, aymtype; 597096963f5SLois Curfman McInnes 5983a40ed3dSBarry Smith PetscFunctionBegin; 5996f08ad05SPierre Jolivet if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 6009566063dSJacob Faibussowitsch PetscCall(VecCopy(yy, zz)); 601459e8d23SBlanca Mellado Pinto if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 602459e8d23SBlanca Mellado Pinto else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 6039566063dSJacob Faibussowitsch PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 6049566063dSJacob Faibussowitsch PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 6059566063dSJacob Faibussowitsch PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 6069566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 6079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 6089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 6093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 610096963f5SLois Curfman McInnes } 611096963f5SLois Curfman McInnes 612459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 613459e8d23SBlanca Mellado Pinto { 614459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 615459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 616459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 617459e8d23SBlanca Mellado Pinto } 618459e8d23SBlanca Mellado Pinto 619459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 620459e8d23SBlanca Mellado Pinto { 621459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 622459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 623459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 624459e8d23SBlanca Mellado Pinto } 625459e8d23SBlanca Mellado Pinto 626459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 627459e8d23SBlanca Mellado Pinto { 628459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 629459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 630459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 631459e8d23SBlanca Mellado Pinto } 632459e8d23SBlanca Mellado Pinto 633459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 634459e8d23SBlanca Mellado Pinto { 635459e8d23SBlanca Mellado Pinto PetscFunctionBegin; 636459e8d23SBlanca Mellado Pinto PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 637459e8d23SBlanca Mellado Pinto PetscFunctionReturn(PETSC_SUCCESS); 638459e8d23SBlanca Mellado Pinto } 639459e8d23SBlanca Mellado Pinto 640d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 641d71ae5a4SJacob Faibussowitsch { 64239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 64379560b15SJacob Faibussowitsch PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 64479560b15SJacob Faibussowitsch PetscScalar *x; 645637a0070SStefano Zampini const PetscScalar *av; 646ed3cc1f0SBarry Smith 6473a40ed3dSBarry Smith PetscFunctionBegin; 6489566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x)); 64979560b15SJacob Faibussowitsch PetscCall(VecGetSize(v, &ng)); 65079560b15SJacob Faibussowitsch PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 65179560b15SJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &nl)); 652d0f46423SBarry Smith len = PetscMin(a->A->rmap->n, a->A->cmap->n); 653d0f46423SBarry Smith radd = A->rmap->rstart * m; 6549566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 6559566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 656ad540459SPierre Jolivet for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 6579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 658*0be0d8bdSHansol Suh if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 6599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x)); 6603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6618965ea79SLois Curfman McInnes } 6628965ea79SLois Curfman McInnes 66366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIDense(Mat mat) 664d71ae5a4SJacob Faibussowitsch { 6653501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666ed3cc1f0SBarry Smith 6673a40ed3dSBarry Smith PetscFunctionBegin; 6683ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 6699566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 67028b400f6SJacob Faibussowitsch PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 67128b400f6SJacob Faibussowitsch PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 6729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->A)); 6739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->lvec)); 6749566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&mdn->Mvctx)); 6759566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mdn->cvec)); 6769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mdn->cmat)); 67701b82886SBarry Smith 6789566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 6799566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 6808baccfbdSHong Zhang 6819566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 6829566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 6839566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 6849566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 6859566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 6869566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 6879566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 6889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 6899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 6909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 6919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 6929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 6939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 6948baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 6959566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 6968baccfbdSHong Zhang #endif 697d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 6989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 699d24d4204SJose E. Roman #endif 7009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 7019566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 7029566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 7036718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 7049566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 7059566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 7062e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 7072e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 7082e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 7092e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 7102e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 7112e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 7122e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 7132e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 7142e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 7152e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 7162e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 7172e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 7182e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 7192e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 7202e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 7213d9668e3SJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 7226718818eSStefano Zampini #endif 72347d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 72447d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 72547d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 72647d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 72747d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 72847d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 72947d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 73047d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 73147d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 73247d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 73347d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 73447d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 73547d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 73647d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 73747d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 73847d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 73947d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 74047d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 7413d9668e3SJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 74247d993e7Ssuyashtn #endif 7439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 7449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 7459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 7469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 7479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 7489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 7499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 7509566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 7519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 7529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 753*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 754*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 755*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 756f8103e6bSStefano Zampini 757f8103e6bSStefano Zampini PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 7583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7598965ea79SLois Curfman McInnes } 76039ddd567SLois Curfman McInnes 7619804daf3SBarry Smith #include <petscdraw.h> 762d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 763d71ae5a4SJacob Faibussowitsch { 76439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 765616b8fbbSStefano Zampini PetscMPIInt rank; 76619fd82e9SBarry Smith PetscViewerType vtype; 767ace3abfcSBarry Smith PetscBool iascii, isdraw; 768b0a32e0cSBarry Smith PetscViewer sviewer; 769f3ef73ceSBarry Smith PetscViewerFormat format; 7708965ea79SLois Curfman McInnes 7713a40ed3dSBarry Smith PetscFunctionBegin; 7729566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 7749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 77532077d6dSBarry Smith if (iascii) { 7769566063dSJacob Faibussowitsch PetscCall(PetscViewerGetType(viewer, &vtype)); 7779566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 778456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 7794e220ebcSLois Curfman McInnes MatInfo info; 7809566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 7819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 7829371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 7839371c9d4SSatish Balay (PetscInt)info.memory)); 7849566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 7859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 7866f08ad05SPierre Jolivet if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 7873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 788fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 7893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7908965ea79SLois Curfman McInnes } 791f1af5d2fSBarry Smith } else if (isdraw) { 792b0a32e0cSBarry Smith PetscDraw draw; 793ace3abfcSBarry Smith PetscBool isnull; 794f1af5d2fSBarry Smith 7959566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 7969566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 7973ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 798f1af5d2fSBarry Smith } 79977ed5343SBarry Smith 8007da1fb6eSBarry Smith { 8018965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 8028965ea79SLois Curfman McInnes Mat A; 803d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 804ba8c8a56SBarry Smith PetscInt *cols; 805ba8c8a56SBarry Smith PetscScalar *vals; 8068965ea79SLois Curfman McInnes 8079566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 808dd400576SPatrick Sanan if (rank == 0) { 8099566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 8103a40ed3dSBarry Smith } else { 8119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 8128965ea79SLois Curfman McInnes } 8137adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 8149566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPIDENSE)); 8159566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 8168965ea79SLois Curfman McInnes 81739ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 81839ddd567SLois Curfman McInnes but it's quick for now */ 81951022da4SBarry Smith A->insertmode = INSERT_VALUES; 8202205254eSKarl Rupp 8212205254eSKarl Rupp row = mat->rmap->rstart; 8222205254eSKarl Rupp m = mdn->A->rmap->n; 8238965ea79SLois Curfman McInnes for (i = 0; i < m; i++) { 8249566063dSJacob Faibussowitsch PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 8259566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 8269566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 82739ddd567SLois Curfman McInnes row++; 8288965ea79SLois Curfman McInnes } 8298965ea79SLois Curfman McInnes 8309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 8319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 8329566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 833dd400576SPatrick Sanan if (rank == 0) { 834f4f49eeaSPierre Jolivet PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 835f4f49eeaSPierre Jolivet PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 8368965ea79SLois Curfman McInnes } 8379566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 8389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 8398965ea79SLois Curfman McInnes } 8403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8418965ea79SLois Curfman McInnes } 8428965ea79SLois Curfman McInnes 84366976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 844d71ae5a4SJacob Faibussowitsch { 845ace3abfcSBarry Smith PetscBool iascii, isbinary, isdraw, issocket; 8468965ea79SLois Curfman McInnes 847433994e6SBarry Smith PetscFunctionBegin; 8489566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 8499566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 8509566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 8519566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 8520f5bd95cSBarry Smith 85332077d6dSBarry Smith if (iascii || issocket || isdraw) { 8549566063dSJacob Faibussowitsch PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 8551baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 8563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 8578965ea79SLois Curfman McInnes } 8588965ea79SLois Curfman McInnes 85966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 860d71ae5a4SJacob Faibussowitsch { 8613501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 8623501a2bdSLois Curfman McInnes Mat mdn = mat->A; 8633966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 8648965ea79SLois Curfman McInnes 8653a40ed3dSBarry Smith PetscFunctionBegin; 8664e220ebcSLois Curfman McInnes info->block_size = 1.0; 8672205254eSKarl Rupp 8689566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 8692205254eSKarl Rupp 8709371c9d4SSatish Balay isend[0] = info->nz_used; 8719371c9d4SSatish Balay isend[1] = info->nz_allocated; 8729371c9d4SSatish Balay isend[2] = info->nz_unneeded; 8739371c9d4SSatish Balay isend[3] = info->memory; 8749371c9d4SSatish Balay isend[4] = info->mallocs; 8758965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 8764e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 8774e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 8784e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 8794e220ebcSLois Curfman McInnes info->memory = isend[3]; 8804e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 8818965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 8821c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 8832205254eSKarl Rupp 8844e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8854e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8864e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8874e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8884e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8898965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 8901c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 8912205254eSKarl Rupp 8924e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8934e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8944e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8954e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8964e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8978965ea79SLois Curfman McInnes } 8984e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8994e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9004e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9028965ea79SLois Curfman McInnes } 9038965ea79SLois Curfman McInnes 90466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 905d71ae5a4SJacob Faibussowitsch { 90639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 9078965ea79SLois Curfman McInnes 9083a40ed3dSBarry Smith PetscFunctionBegin; 90912c028f9SKris Buschelman switch (op) { 910512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 91112c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 91212c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 91343674050SBarry Smith MatCheckPreallocated(A, 1); 9149566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 91512c028f9SKris Buschelman break; 91612c028f9SKris Buschelman case MAT_ROW_ORIENTED: 91743674050SBarry Smith MatCheckPreallocated(A, 1); 9184e0d8c25SBarry Smith a->roworiented = flg; 9199566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 92012c028f9SKris Buschelman break; 9218c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 92213fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 92312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 924d71ae5a4SJacob Faibussowitsch case MAT_SORTED_FULL: 925d71ae5a4SJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 926d71ae5a4SJacob Faibussowitsch break; 927d71ae5a4SJacob Faibussowitsch case MAT_IGNORE_OFF_PROC_ENTRIES: 928d71ae5a4SJacob Faibussowitsch a->donotstash = flg; 929d71ae5a4SJacob Faibussowitsch break; 93077e54ba9SKris Buschelman case MAT_SYMMETRIC: 93177e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 9329a4540c5SBarry Smith case MAT_HERMITIAN: 9339a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 934b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 935b94d7dedSBarry Smith case MAT_SPD: 936600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 9375d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 938b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 939b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 9409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 94177e54ba9SKris Buschelman break; 942d71ae5a4SJacob Faibussowitsch default: 943d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 9443a40ed3dSBarry Smith } 9453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9468965ea79SLois Curfman McInnes } 9478965ea79SLois Curfman McInnes 94866976f2fSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 949d71ae5a4SJacob Faibussowitsch { 9505b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 951637a0070SStefano Zampini const PetscScalar *l; 952637a0070SStefano Zampini PetscScalar x, *v, *vv, *r; 953637a0070SStefano Zampini PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 9545b2fa520SLois Curfman McInnes 9555b2fa520SLois Curfman McInnes PetscFunctionBegin; 9569566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(mdn->A, &vv)); 9579566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(mdn->A, &lda)); 9589566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, &s2, &s3)); 9595b2fa520SLois Curfman McInnes if (ll) { 9609566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll, &s2a)); 96108401ef6SPierre Jolivet PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 9629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ll, &l)); 9635b2fa520SLois Curfman McInnes for (i = 0; i < m; i++) { 9645b2fa520SLois Curfman McInnes x = l[i]; 965637a0070SStefano Zampini v = vv + i; 9669371c9d4SSatish Balay for (j = 0; j < n; j++) { 9679371c9d4SSatish Balay (*v) *= x; 9689371c9d4SSatish Balay v += lda; 9699371c9d4SSatish Balay } 9705b2fa520SLois Curfman McInnes } 9719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ll, &l)); 9729566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 9735b2fa520SLois Curfman McInnes } 9745b2fa520SLois Curfman McInnes if (rr) { 975637a0070SStefano Zampini const PetscScalar *ar; 976637a0070SStefano Zampini 9779566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &s3a)); 97808401ef6SPierre Jolivet PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 9799566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rr, &ar)); 9806f08ad05SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 9819566063dSJacob Faibussowitsch PetscCall(VecGetArray(mdn->lvec, &r)); 9829566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 9839566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 9849566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rr, &ar)); 9855b2fa520SLois Curfman McInnes for (i = 0; i < n; i++) { 9865b2fa520SLois Curfman McInnes x = r[i]; 987637a0070SStefano Zampini v = vv + i * lda; 9882205254eSKarl Rupp for (j = 0; j < m; j++) (*v++) *= x; 9895b2fa520SLois Curfman McInnes } 9909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mdn->lvec, &r)); 9919566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * n * m)); 9925b2fa520SLois Curfman McInnes } 9939566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 9943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9955b2fa520SLois Curfman McInnes } 9965b2fa520SLois Curfman McInnes 99766976f2fSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 998d71ae5a4SJacob Faibussowitsch { 9993501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 100013f74950SBarry Smith PetscInt i, j; 1001616b8fbbSStefano Zampini PetscMPIInt size; 1002329f5518SBarry Smith PetscReal sum = 0.0; 1003637a0070SStefano Zampini const PetscScalar *av, *v; 10043501a2bdSLois Curfman McInnes 10053a40ed3dSBarry Smith PetscFunctionBegin; 10069566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1007637a0070SStefano Zampini v = av; 10089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1009616b8fbbSStefano Zampini if (size == 1) { 10109566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, nrm)); 10113501a2bdSLois Curfman McInnes } else { 10123501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 1013d0f46423SBarry Smith for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 10149371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 10159371c9d4SSatish Balay v++; 10163501a2bdSLois Curfman McInnes } 10171c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 10188f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 10199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 10203a40ed3dSBarry Smith } else if (type == NORM_1) { 1021329f5518SBarry Smith PetscReal *tmp, *tmp2; 10229566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1023064f8208SBarry Smith *nrm = 0.0; 1024637a0070SStefano Zampini v = av; 1025d0f46423SBarry Smith for (j = 0; j < mdn->A->cmap->n; j++) { 1026d0f46423SBarry Smith for (i = 0; i < mdn->A->rmap->n; i++) { 10279371c9d4SSatish Balay tmp[j] += PetscAbsScalar(*v); 10289371c9d4SSatish Balay v++; 10293501a2bdSLois Curfman McInnes } 10303501a2bdSLois Curfman McInnes } 10311c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1032d0f46423SBarry Smith for (j = 0; j < A->cmap->N; j++) { 1033064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 10343501a2bdSLois Curfman McInnes } 10359566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmp, tmp2)); 10369566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 10373a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 1038329f5518SBarry Smith PetscReal ntemp; 10399566063dSJacob Faibussowitsch PetscCall(MatNorm(mdn->A, type, &ntemp)); 10401c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1041ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 10423501a2bdSLois Curfman McInnes } 10439566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 10443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10453501a2bdSLois Curfman McInnes } 10463501a2bdSLois Curfman McInnes 104766976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1048d71ae5a4SJacob Faibussowitsch { 10493501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense *)A->data; 10503501a2bdSLois Curfman McInnes Mat B; 1051d0f46423SBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1052637a0070SStefano Zampini PetscInt j, i, lda; 105387828ca2SBarry Smith PetscScalar *v; 10543501a2bdSLois Curfman McInnes 10553a40ed3dSBarry Smith PetscFunctionBegin; 10567fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1057cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 10589566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 10599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 10609566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 10619566063dSJacob Faibussowitsch PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1062637a0070SStefano Zampini } else B = *matout; 10633501a2bdSLois Curfman McInnes 10649371c9d4SSatish Balay m = a->A->rmap->n; 10659371c9d4SSatish Balay n = a->A->cmap->n; 10669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 10679566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 10689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rwork)); 10693501a2bdSLois Curfman McInnes for (i = 0; i < m; i++) rwork[i] = rstart + i; 10701acff37aSSatish Balay for (j = 0; j < n; j++) { 10719566063dSJacob Faibussowitsch PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 10728e3a54c0SPierre Jolivet v = PetscSafePointerPlusOffset(v, lda); 10733501a2bdSLois Curfman McInnes } 10749566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(rwork)); 10769566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 10779566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1078cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 10793501a2bdSLois Curfman McInnes *matout = B; 10803501a2bdSLois Curfman McInnes } else { 10819566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &B)); 10823501a2bdSLois Curfman McInnes } 10833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1084096963f5SLois Curfman McInnes } 1085096963f5SLois Curfman McInnes 10866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 108752c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 10888965ea79SLois Curfman McInnes 108966976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_MPIDense(Mat A) 1090d71ae5a4SJacob Faibussowitsch { 1091273d9f13SBarry Smith PetscFunctionBegin; 10929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->rmap)); 10939566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(A->cmap)); 109448a46eb9SPierre Jolivet if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 10953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1096273d9f13SBarry Smith } 1097273d9f13SBarry Smith 109866976f2fSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1099d71ae5a4SJacob Faibussowitsch { 1100488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1101488007eeSBarry Smith 1102488007eeSBarry Smith PetscFunctionBegin; 11039566063dSJacob Faibussowitsch PetscCall(MatAXPY(A->A, alpha, B->A, str)); 11043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1105488007eeSBarry Smith } 1106488007eeSBarry Smith 110766976f2fSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1108d71ae5a4SJacob Faibussowitsch { 1109ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1110ba337c44SJed Brown 1111ba337c44SJed Brown PetscFunctionBegin; 11129566063dSJacob Faibussowitsch PetscCall(MatConjugate(a->A)); 11133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1114ba337c44SJed Brown } 1115ba337c44SJed Brown 111666976f2fSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIDense(Mat A) 1117d71ae5a4SJacob Faibussowitsch { 1118ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1119ba337c44SJed Brown 1120ba337c44SJed Brown PetscFunctionBegin; 11219566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 11223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1123ba337c44SJed Brown } 1124ba337c44SJed Brown 112566976f2fSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1126d71ae5a4SJacob Faibussowitsch { 1127ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1128ba337c44SJed Brown 1129ba337c44SJed Brown PetscFunctionBegin; 11309566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 11313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1132ba337c44SJed Brown } 1133ba337c44SJed Brown 1134d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1135d71ae5a4SJacob Faibussowitsch { 1136637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 113749a6ff4bSBarry Smith 113849a6ff4bSBarry Smith PetscFunctionBegin; 113928b400f6SJacob Faibussowitsch PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 114028b400f6SJacob Faibussowitsch PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 11419566063dSJacob Faibussowitsch PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 11423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 114349a6ff4bSBarry Smith } 114449a6ff4bSBarry Smith 1145857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 114652c5f739Sprj- 114766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1148d71ae5a4SJacob Faibussowitsch { 1149a873a8cdSSam Reynolds PetscInt i, m, n; 11500716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense *)A->data; 11510716a85fSBarry Smith PetscReal *work; 11520716a85fSBarry Smith 11530716a85fSBarry Smith PetscFunctionBegin; 11549566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n)); 11559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &work)); 1156857cbf51SRichard Tran Mills if (type == REDUCTION_MEAN_REALPART) { 11579566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1158857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 11599566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1160a873a8cdSSam Reynolds } else { 11619566063dSJacob Faibussowitsch PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1162a873a8cdSSam Reynolds } 1163857cbf51SRichard Tran Mills if (type == NORM_2) { 11640716a85fSBarry Smith for (i = 0; i < n; i++) work[i] *= work[i]; 11650716a85fSBarry Smith } 1166857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 11671c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 11680716a85fSBarry Smith } else { 11691c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 11700716a85fSBarry Smith } 11719566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 1172857cbf51SRichard Tran Mills if (type == NORM_2) { 1173a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1174857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1175a873a8cdSSam Reynolds for (i = 0; i < n; i++) reductions[i] /= m; 11760716a85fSBarry Smith } 11773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11780716a85fSBarry Smith } 11790716a85fSBarry Smith 1180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1181d71ae5a4SJacob Faibussowitsch { 118273a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense *)x->data; 118373a71a0fSBarry Smith 118473a71a0fSBarry Smith PetscFunctionBegin; 11859566063dSJacob Faibussowitsch PetscCall(MatSetRandom(d->A, rctx)); 11863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 11873faff063SStefano Zampini x->offloadmask = d->A->offloadmask; 11883faff063SStefano Zampini #endif 11893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 119073a71a0fSBarry Smith } 119173a71a0fSBarry Smith 1192d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1193d71ae5a4SJacob Faibussowitsch { 11943b49f96aSBarry Smith PetscFunctionBegin; 11953b49f96aSBarry Smith *missing = PETSC_FALSE; 11963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11973b49f96aSBarry Smith } 11983b49f96aSBarry Smith 11994222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1200cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12016718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1202a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 12036718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 12046718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 12055d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1206cc48ffa7SToby Isaac 120709dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 120809dc0095SBarry Smith MatGetRow_MPIDense, 120909dc0095SBarry Smith MatRestoreRow_MPIDense, 121009dc0095SBarry Smith MatMult_MPIDense, 121197304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 12127c922b88SBarry Smith MatMultTranspose_MPIDense, 12137c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 1214f4259b30SLisandro Dalcin NULL, 1215f4259b30SLisandro Dalcin NULL, 1216f4259b30SLisandro Dalcin NULL, 1217f4259b30SLisandro Dalcin /* 10*/ NULL, 1218f4259b30SLisandro Dalcin NULL, 1219f4259b30SLisandro Dalcin NULL, 1220f4259b30SLisandro Dalcin NULL, 122109dc0095SBarry Smith MatTranspose_MPIDense, 122297304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 12236e4ee0c6SHong Zhang MatEqual_MPIDense, 122409dc0095SBarry Smith MatGetDiagonal_MPIDense, 12255b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 122609dc0095SBarry Smith MatNorm_MPIDense, 122797304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 122809dc0095SBarry Smith MatAssemblyEnd_MPIDense, 122909dc0095SBarry Smith MatSetOption_MPIDense, 123009dc0095SBarry Smith MatZeroEntries_MPIDense, 1231d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1232f4259b30SLisandro Dalcin NULL, 1233f4259b30SLisandro Dalcin NULL, 1234f4259b30SLisandro Dalcin NULL, 1235f4259b30SLisandro Dalcin NULL, 12364994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1237f4259b30SLisandro Dalcin NULL, 1238f4259b30SLisandro Dalcin NULL, 1239c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 1240f4259b30SLisandro Dalcin NULL, 1241d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 1242f4259b30SLisandro Dalcin NULL, 1243f4259b30SLisandro Dalcin NULL, 1244f4259b30SLisandro Dalcin NULL, 1245f4259b30SLisandro Dalcin NULL, 1246d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 12477dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 1248f4259b30SLisandro Dalcin NULL, 124909dc0095SBarry Smith MatGetValues_MPIDense, 1250a76f77c3SStefano Zampini MatCopy_MPIDense, 1251f4259b30SLisandro Dalcin /* 44*/ NULL, 125209dc0095SBarry Smith MatScale_MPIDense, 12532f605a99SJose E. Roman MatShift_MPIDense, 1254f4259b30SLisandro Dalcin NULL, 1255f4259b30SLisandro Dalcin NULL, 125673a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 1257f4259b30SLisandro Dalcin NULL, 1258f4259b30SLisandro Dalcin NULL, 1259f4259b30SLisandro Dalcin NULL, 1260f4259b30SLisandro Dalcin NULL, 1261f4259b30SLisandro Dalcin /* 54*/ NULL, 1262f4259b30SLisandro Dalcin NULL, 1263f4259b30SLisandro Dalcin NULL, 1264f4259b30SLisandro Dalcin NULL, 1265f4259b30SLisandro Dalcin NULL, 12667dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1267b9b97703SBarry Smith MatDestroy_MPIDense, 1268b9b97703SBarry Smith MatView_MPIDense, 1269f4259b30SLisandro Dalcin NULL, 1270f4259b30SLisandro Dalcin NULL, 1271f4259b30SLisandro Dalcin /* 64*/ NULL, 1272f4259b30SLisandro Dalcin NULL, 1273f4259b30SLisandro Dalcin NULL, 1274f4259b30SLisandro Dalcin NULL, 1275f4259b30SLisandro Dalcin NULL, 1276f4259b30SLisandro Dalcin /* 69*/ NULL, 1277f4259b30SLisandro Dalcin NULL, 1278f4259b30SLisandro Dalcin NULL, 1279f4259b30SLisandro Dalcin NULL, 1280f4259b30SLisandro Dalcin NULL, 1281f4259b30SLisandro Dalcin /* 74*/ NULL, 1282f4259b30SLisandro Dalcin NULL, 1283f4259b30SLisandro Dalcin NULL, 1284f4259b30SLisandro Dalcin NULL, 1285f4259b30SLisandro Dalcin NULL, 1286f4259b30SLisandro Dalcin /* 79*/ NULL, 1287f4259b30SLisandro Dalcin NULL, 1288f4259b30SLisandro Dalcin NULL, 1289f4259b30SLisandro Dalcin NULL, 12905bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1291f4259b30SLisandro Dalcin NULL, 1292f4259b30SLisandro Dalcin NULL, 1293f4259b30SLisandro Dalcin NULL, 1294f4259b30SLisandro Dalcin NULL, 1295f4259b30SLisandro Dalcin NULL, 1296f4259b30SLisandro Dalcin /* 89*/ NULL, 1297f4259b30SLisandro Dalcin NULL, 1298f4259b30SLisandro Dalcin NULL, 1299f4259b30SLisandro Dalcin NULL, 1300f4259b30SLisandro Dalcin NULL, 1301f4259b30SLisandro Dalcin /* 94*/ NULL, 1302f4259b30SLisandro Dalcin NULL, 13036718818eSStefano Zampini MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1304cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1305f4259b30SLisandro Dalcin NULL, 13064222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1307f4259b30SLisandro Dalcin NULL, 1308f4259b30SLisandro Dalcin NULL, 1309ba337c44SJed Brown MatConjugate_MPIDense, 1310f4259b30SLisandro Dalcin NULL, 1311f4259b30SLisandro Dalcin /*104*/ NULL, 1312ba337c44SJed Brown MatRealPart_MPIDense, 1313ba337c44SJed Brown MatImaginaryPart_MPIDense, 1314f4259b30SLisandro Dalcin NULL, 1315f4259b30SLisandro Dalcin NULL, 1316f4259b30SLisandro Dalcin /*109*/ NULL, 1317f4259b30SLisandro Dalcin NULL, 1318f4259b30SLisandro Dalcin NULL, 131949a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 13203b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 1321f4259b30SLisandro Dalcin /*114*/ NULL, 1322f4259b30SLisandro Dalcin NULL, 1323f4259b30SLisandro Dalcin NULL, 1324f4259b30SLisandro Dalcin NULL, 1325f4259b30SLisandro Dalcin NULL, 1326f4259b30SLisandro Dalcin /*119*/ NULL, 1327f4259b30SLisandro Dalcin NULL, 1328459e8d23SBlanca Mellado Pinto MatMultHermitianTranspose_MPIDense, 1329459e8d23SBlanca Mellado Pinto MatMultHermitianTransposeAdd_MPIDense, 1330f4259b30SLisandro Dalcin NULL, 1331f4259b30SLisandro Dalcin /*124*/ NULL, 1332a873a8cdSSam Reynolds MatGetColumnReductions_MPIDense, 1333f4259b30SLisandro Dalcin NULL, 1334f4259b30SLisandro Dalcin NULL, 1335f4259b30SLisandro Dalcin NULL, 1336f4259b30SLisandro Dalcin /*129*/ NULL, 1337f4259b30SLisandro Dalcin NULL, 13386718818eSStefano Zampini MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1339cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 1340f4259b30SLisandro Dalcin NULL, 1341f4259b30SLisandro Dalcin /*134*/ NULL, 1342f4259b30SLisandro Dalcin NULL, 1343f4259b30SLisandro Dalcin NULL, 1344f4259b30SLisandro Dalcin NULL, 1345f4259b30SLisandro Dalcin NULL, 1346f4259b30SLisandro Dalcin /*139*/ NULL, 1347f4259b30SLisandro Dalcin NULL, 1348f4259b30SLisandro Dalcin NULL, 1349f4259b30SLisandro Dalcin NULL, 1350f4259b30SLisandro Dalcin NULL, 13514222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 1352f4259b30SLisandro Dalcin /*145*/ NULL, 1353f4259b30SLisandro Dalcin NULL, 135499a7f59eSMark Adams NULL, 135599a7f59eSMark Adams NULL, 13567fb60732SBarry Smith NULL, 1357dec0b466SHong Zhang /*150*/ NULL, 1358eede4a3fSMark Adams NULL, 1359dec0b466SHong Zhang NULL}; 13608965ea79SLois Curfman McInnes 136166976f2fSJacob Faibussowitsch static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1362d71ae5a4SJacob Faibussowitsch { 1363637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 136447d993e7Ssuyashtn MatType mtype = MATSEQDENSE; 1365a23d5eceSKris Buschelman 1366a23d5eceSKris Buschelman PetscFunctionBegin; 136728b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 13689566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 13699566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 1370637a0070SStefano Zampini if (!a->A) { 13719566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 13729566063dSJacob Faibussowitsch PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1373637a0070SStefano Zampini } 137447d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) 137547d993e7Ssuyashtn PetscBool iscuda; 13769566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 137747d993e7Ssuyashtn if (iscuda) mtype = MATSEQDENSECUDA; 137847d993e7Ssuyashtn #endif 137947d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 138047d993e7Ssuyashtn PetscBool iship; 138147d993e7Ssuyashtn PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 138247d993e7Ssuyashtn if (iship) mtype = MATSEQDENSEHIP; 138347d993e7Ssuyashtn #endif 138447d993e7Ssuyashtn PetscCall(MatSetType(a->A, mtype)); 13859566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 138647d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 13873faff063SStefano Zampini mat->offloadmask = a->A->offloadmask; 13883faff063SStefano Zampini #endif 1389637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 13906f08ad05SPierre Jolivet mat->assembled = PETSC_TRUE; 13913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1392a23d5eceSKris Buschelman } 1393a23d5eceSKris Buschelman 1394d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1395d71ae5a4SJacob Faibussowitsch { 13964e29119aSPierre Jolivet Mat B, C; 13974e29119aSPierre Jolivet 13984e29119aSPierre Jolivet PetscFunctionBegin; 13999566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 14009566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 14019566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 14024e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14034e29119aSPierre Jolivet C = *newmat; 14044e29119aSPierre Jolivet } else C = NULL; 14059566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14069566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14074e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14089566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14094e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14114e29119aSPierre Jolivet } 14124e29119aSPierre Jolivet 141366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1414d71ae5a4SJacob Faibussowitsch { 14154e29119aSPierre Jolivet Mat B, C; 14164e29119aSPierre Jolivet 14174e29119aSPierre Jolivet PetscFunctionBegin; 14189566063dSJacob Faibussowitsch PetscCall(MatDenseGetLocalMatrix(A, &C)); 14199566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 14204e29119aSPierre Jolivet if (reuse == MAT_REUSE_MATRIX) { 14214e29119aSPierre Jolivet C = *newmat; 14224e29119aSPierre Jolivet } else C = NULL; 14239566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 14249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 14254e29119aSPierre Jolivet if (reuse == MAT_INPLACE_MATRIX) { 14269566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &C)); 14274e29119aSPierre Jolivet } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 14283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14294e29119aSPierre Jolivet } 14304e29119aSPierre Jolivet 143165b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1432d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1433d71ae5a4SJacob Faibussowitsch { 1434f709e8d7SPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 14358ea901baSHong Zhang Mat mat_elemental; 143632d7a744SHong Zhang PetscScalar *v; 1437f709e8d7SPierre Jolivet PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 14388ea901baSHong Zhang 14398baccfbdSHong Zhang PetscFunctionBegin; 1440378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1441378336b6SHong Zhang mat_elemental = *newmat; 14429566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(*newmat)); 1443378336b6SHong Zhang } else { 14449566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 14459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 14469566063dSJacob Faibussowitsch PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 14479566063dSJacob Faibussowitsch PetscCall(MatSetUp(mat_elemental)); 14489566063dSJacob Faibussowitsch PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1449378336b6SHong Zhang } 1450378336b6SHong Zhang 14519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &rows, N, &cols)); 145232d7a744SHong Zhang for (i = 0; i < N; i++) cols[i] = i; 145332d7a744SHong Zhang for (i = 0; i < m; i++) rows[i] = rstart + i; 14548ea901baSHong Zhang 1455637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 14569566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(A, &v)); 1457f709e8d7SPierre Jolivet PetscCall(MatDenseGetLDA(a->A, &lda)); 1458f709e8d7SPierre Jolivet if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1459f709e8d7SPierre Jolivet else { 1460f709e8d7SPierre Jolivet for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1461f709e8d7SPierre Jolivet } 14629566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 14639566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 14649566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(A, &v)); 14659566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, cols)); 14668ea901baSHong Zhang 1467511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 14689566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &mat_elemental)); 14698ea901baSHong Zhang } else { 14708ea901baSHong Zhang *newmat = mat_elemental; 14718ea901baSHong Zhang } 14723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 14738baccfbdSHong Zhang } 147465b80a83SHong Zhang #endif 14758baccfbdSHong Zhang 1476d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1477d71ae5a4SJacob Faibussowitsch { 147886aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 147986aefd0dSHong Zhang 148086aefd0dSHong Zhang PetscFunctionBegin; 14819566063dSJacob Faibussowitsch PetscCall(MatDenseGetColumn(mat->A, col, vals)); 14823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 148386aefd0dSHong Zhang } 148486aefd0dSHong Zhang 1485d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1486d71ae5a4SJacob Faibussowitsch { 148786aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 148886aefd0dSHong Zhang 148986aefd0dSHong Zhang PetscFunctionBegin; 14909566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreColumn(mat->A, vals)); 14913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 149286aefd0dSHong Zhang } 149386aefd0dSHong Zhang 1494d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1495d71ae5a4SJacob Faibussowitsch { 149694e2cb23SJakub Kruzik Mat_MPIDense *mat; 149794e2cb23SJakub Kruzik PetscInt m, nloc, N; 149894e2cb23SJakub Kruzik 149994e2cb23SJakub Kruzik PetscFunctionBegin; 15009566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 15019566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 150294e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 150394e2cb23SJakub Kruzik PetscInt sum; 150494e2cb23SJakub Kruzik 150548a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 150694e2cb23SJakub Kruzik /* Check sum(n) = N */ 15071c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 150808401ef6SPierre Jolivet PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 150994e2cb23SJakub Kruzik 15109566063dSJacob Faibussowitsch PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 15119566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 151294e2cb23SJakub Kruzik } 151394e2cb23SJakub Kruzik 151494e2cb23SJakub Kruzik /* numeric phase */ 151594e2cb23SJakub Kruzik mat = (Mat_MPIDense *)(*outmat)->data; 15169566063dSJacob Faibussowitsch PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 15173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 151894e2cb23SJakub Kruzik } 151994e2cb23SJakub Kruzik 1520d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1521d71ae5a4SJacob Faibussowitsch { 15226947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15236947451fSStefano Zampini PetscInt lda; 15246947451fSStefano Zampini 15256947451fSStefano Zampini PetscFunctionBegin; 152628b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 152728b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1528d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15296947451fSStefano Zampini a->vecinuse = col + 1; 15309566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15319566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 15328e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15336947451fSStefano Zampini *v = a->cvec; 15343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15356947451fSStefano Zampini } 15366947451fSStefano Zampini 1537d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1538d71ae5a4SJacob Faibussowitsch { 15396947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15406947451fSStefano Zampini 15416947451fSStefano Zampini PetscFunctionBegin; 154228b400f6SJacob Faibussowitsch PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 154328b400f6SJacob Faibussowitsch PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 15446947451fSStefano Zampini a->vecinuse = 0; 15459566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 15469566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1547742765d3SMatthew Knepley if (v) *v = NULL; 15483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15496947451fSStefano Zampini } 15506947451fSStefano Zampini 1551d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1552d71ae5a4SJacob Faibussowitsch { 15536947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15546947451fSStefano Zampini PetscInt lda; 15556947451fSStefano Zampini 15566947451fSStefano Zampini PetscFunctionBegin; 155728b400f6SJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 155828b400f6SJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1559d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15606947451fSStefano Zampini a->vecinuse = col + 1; 15619566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15629566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 15638e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15649566063dSJacob Faibussowitsch PetscCall(VecLockReadPush(a->cvec)); 15656947451fSStefano Zampini *v = a->cvec; 15663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15676947451fSStefano Zampini } 15686947451fSStefano Zampini 1569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1570d71ae5a4SJacob Faibussowitsch { 15716947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15726947451fSStefano Zampini 15736947451fSStefano Zampini PetscFunctionBegin; 15745f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 15755f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 15766947451fSStefano Zampini a->vecinuse = 0; 15779566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 15789566063dSJacob Faibussowitsch PetscCall(VecLockReadPop(a->cvec)); 15799566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1580742765d3SMatthew Knepley if (v) *v = NULL; 15813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15826947451fSStefano Zampini } 15836947451fSStefano Zampini 1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1585d71ae5a4SJacob Faibussowitsch { 15866947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 15876947451fSStefano Zampini PetscInt lda; 15886947451fSStefano Zampini 15896947451fSStefano Zampini PetscFunctionBegin; 15905f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 15915f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1592d16ceb75SStefano Zampini if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 15936947451fSStefano Zampini a->vecinuse = col + 1; 15949566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &lda)); 15959566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 15968e3a54c0SPierre Jolivet PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 15976947451fSStefano Zampini *v = a->cvec; 15983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 15996947451fSStefano Zampini } 16006947451fSStefano Zampini 1601d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1602d71ae5a4SJacob Faibussowitsch { 16036947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense *)A->data; 16046947451fSStefano Zampini 16056947451fSStefano Zampini PetscFunctionBegin; 16065f80ce2aSJacob Faibussowitsch PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 16075f80ce2aSJacob Faibussowitsch PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 16086947451fSStefano Zampini a->vecinuse = 0; 16099566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 16109566063dSJacob Faibussowitsch PetscCall(VecResetArray(a->cvec)); 1611742765d3SMatthew Knepley if (v) *v = NULL; 16123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16136947451fSStefano Zampini } 16146947451fSStefano Zampini 161566976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1616d71ae5a4SJacob Faibussowitsch { 16175ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1618616b8fbbSStefano Zampini Mat_MPIDense *c; 1619616b8fbbSStefano Zampini MPI_Comm comm; 1620a2748737SPierre Jolivet PetscInt pbegin, pend; 16215ea7661aSPierre Jolivet 16225ea7661aSPierre Jolivet PetscFunctionBegin; 16239566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 16245f80ce2aSJacob Faibussowitsch PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 16255f80ce2aSJacob Faibussowitsch PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1626a2748737SPierre Jolivet pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1627a2748737SPierre Jolivet pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 16285ea7661aSPierre Jolivet if (!a->cmat) { 16299566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &a->cmat)); 16309566063dSJacob Faibussowitsch PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1631a2748737SPierre Jolivet if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1632a2748737SPierre Jolivet else { 1633a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1634a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1635a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1636a2748737SPierre Jolivet } 16379566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 16389566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1639a2748737SPierre Jolivet } else { 1640a2748737SPierre Jolivet PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1641a2748737SPierre Jolivet if (same && a->cmat->rmap->N != A->rmap->N) { 1642a2748737SPierre Jolivet same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1643a2748737SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1644a2748737SPierre Jolivet } 1645a2748737SPierre Jolivet if (!same) { 1646a2748737SPierre Jolivet PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1647a2748737SPierre Jolivet PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1648a2748737SPierre Jolivet PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1649a2748737SPierre Jolivet PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1650a2748737SPierre Jolivet PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1651a2748737SPierre Jolivet } 1652a2748737SPierre Jolivet if (cend - cbegin != a->cmat->cmap->N) { 16539566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 16549566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 16559566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 16569566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 16575ea7661aSPierre Jolivet } 1658a2748737SPierre Jolivet } 1659616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 16605f80ce2aSJacob Faibussowitsch PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1661a2748737SPierre Jolivet PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 16623faff063SStefano Zampini 1663616b8fbbSStefano Zampini a->cmat->preallocated = PETSC_TRUE; 1664616b8fbbSStefano Zampini a->cmat->assembled = PETSC_TRUE; 16653faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 16663faff063SStefano Zampini a->cmat->offloadmask = c->A->offloadmask; 16673faff063SStefano Zampini #endif 16685ea7661aSPierre Jolivet a->matinuse = cbegin + 1; 16695ea7661aSPierre Jolivet *v = a->cmat; 16703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16715ea7661aSPierre Jolivet } 16725ea7661aSPierre Jolivet 167366976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1674d71ae5a4SJacob Faibussowitsch { 16755ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1676616b8fbbSStefano Zampini Mat_MPIDense *c; 16775ea7661aSPierre Jolivet 16785ea7661aSPierre Jolivet PetscFunctionBegin; 16795f80ce2aSJacob Faibussowitsch PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 16805f80ce2aSJacob Faibussowitsch PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 16815f80ce2aSJacob Faibussowitsch PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 16825ea7661aSPierre Jolivet a->matinuse = 0; 1683616b8fbbSStefano Zampini c = (Mat_MPIDense *)a->cmat->data; 16849566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1685742765d3SMatthew Knepley if (v) *v = NULL; 16863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 16873faff063SStefano Zampini A->offloadmask = a->A->offloadmask; 16883faff063SStefano Zampini #endif 16893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16905ea7661aSPierre Jolivet } 16915ea7661aSPierre Jolivet 1692002bc6daSRichard Tran Mills /*MC 1693002bc6daSRichard Tran Mills MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1694002bc6daSRichard Tran Mills 16952ef1f0ffSBarry Smith Options Database Key: 169611a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1697002bc6daSRichard Tran Mills 1698002bc6daSRichard Tran Mills Level: beginner 1699002bc6daSRichard Tran Mills 17001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1701002bc6daSRichard Tran Mills M*/ 17024742e46bSJacob Faibussowitsch PetscErrorCode MatCreate_MPIDense(Mat mat) 1703d71ae5a4SJacob Faibussowitsch { 1704273d9f13SBarry Smith Mat_MPIDense *a; 1705273d9f13SBarry Smith 1706273d9f13SBarry Smith PetscFunctionBegin; 17074dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&a)); 1708b0a32e0cSBarry Smith mat->data = (void *)a; 1709aea10558SJacob Faibussowitsch mat->ops[0] = MatOps_Values; 1710273d9f13SBarry Smith 1711273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1712273d9f13SBarry Smith 1713273d9f13SBarry Smith /* build cache for off array entries formed */ 1714273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 17152205254eSKarl Rupp 17169566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1717273d9f13SBarry Smith 1718273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1719f4259b30SLisandro Dalcin a->lvec = NULL; 1720f4259b30SLisandro Dalcin a->Mvctx = NULL; 1721273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1722273d9f13SBarry Smith 17239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 17249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 17259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 17269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 17279566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 17289566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 17299566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 17309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 17319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 17329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 17339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 17349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 17359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 17369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 17379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 17389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 17399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 17409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 17419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 17429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 17439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 17448baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 17459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 17468baccfbdSHong Zhang #endif 1747d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 17489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1749d24d4204SJose E. Roman #endif 1750637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 17519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1752637a0070SStefano Zampini #endif 17539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 17549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 17559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 17566718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 17579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 17589566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 17596718818eSStefano Zampini #endif 176047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 176147d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 176247d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 176347d993e7Ssuyashtn PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 176447d993e7Ssuyashtn #endif 17659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 17669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1767*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1768*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1769*0be0d8bdSHansol Suh PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 17709566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 17713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1772273d9f13SBarry Smith } 1773273d9f13SBarry Smith 1774209238afSKris Buschelman /*MC 1775002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1776209238afSKris Buschelman 177711a5261eSBarry Smith This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 177811a5261eSBarry Smith and `MATMPIDENSE` otherwise. 1779209238afSKris Buschelman 17802ef1f0ffSBarry Smith Options Database Key: 178111a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1782209238afSKris Buschelman 1783209238afSKris Buschelman Level: beginner 1784209238afSKris Buschelman 17851cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 17866947451fSStefano Zampini M*/ 17876947451fSStefano Zampini 1788273d9f13SBarry Smith /*@C 1789273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1790273d9f13SBarry Smith 17918f490ea3SStefano Zampini Collective 1792273d9f13SBarry Smith 1793273d9f13SBarry Smith Input Parameters: 1794fe59aa6dSJacob Faibussowitsch + B - the matrix 17952ef1f0ffSBarry Smith - data - optional location of matrix data. Set to `NULL` for PETSc 1796273d9f13SBarry Smith to control all matrix memory allocation. 1797273d9f13SBarry Smith 17982ef1f0ffSBarry Smith Level: intermediate 17992ef1f0ffSBarry Smith 1800273d9f13SBarry Smith Notes: 18012ef1f0ffSBarry Smith The dense format is fully compatible with standard Fortran 1802273d9f13SBarry Smith storage by columns. 1803273d9f13SBarry Smith 1804273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1805273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 18062ef1f0ffSBarry Smith set `data` to `NULL`. 1807273d9f13SBarry Smith 18081cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1809273d9f13SBarry Smith @*/ 1810d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1811d71ae5a4SJacob Faibussowitsch { 1812273d9f13SBarry Smith PetscFunctionBegin; 1813d5ea218eSStefano Zampini PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1814cac4c232SBarry Smith PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 18153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1816273d9f13SBarry Smith } 1817273d9f13SBarry Smith 1818d3042a70SBarry Smith /*@ 181911a5261eSBarry Smith MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1820d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1821d3042a70SBarry Smith into a matrix 1822d3042a70SBarry Smith 1823d3042a70SBarry Smith Not Collective 1824d3042a70SBarry Smith 1825d3042a70SBarry Smith Input Parameters: 1826d3042a70SBarry Smith + mat - the matrix 1827d3042a70SBarry Smith - array - the array in column major order 1828d3042a70SBarry Smith 18292ef1f0ffSBarry Smith Level: developer 18302ef1f0ffSBarry Smith 183111a5261eSBarry Smith Note: 183211a5261eSBarry Smith You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1833d3042a70SBarry Smith freed when the matrix is destroyed. 1834d3042a70SBarry Smith 18351cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 183611a5261eSBarry Smith `MatDenseReplaceArray()` 1837d3042a70SBarry Smith @*/ 1838d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1839d71ae5a4SJacob Faibussowitsch { 1840d3042a70SBarry Smith PetscFunctionBegin; 1841d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1842cac4c232SBarry Smith PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 18439566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 184447d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1845637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1846637a0070SStefano Zampini #endif 18473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1848d3042a70SBarry Smith } 1849d3042a70SBarry Smith 1850d3042a70SBarry Smith /*@ 185111a5261eSBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1852d3042a70SBarry Smith 1853d3042a70SBarry Smith Not Collective 1854d3042a70SBarry Smith 18552fe279fdSBarry Smith Input Parameter: 1856d3042a70SBarry Smith . mat - the matrix 1857d3042a70SBarry Smith 18582ef1f0ffSBarry Smith Level: developer 18592ef1f0ffSBarry Smith 186011a5261eSBarry Smith Note: 186111a5261eSBarry Smith You can only call this after a call to `MatDensePlaceArray()` 1862d3042a70SBarry Smith 18631cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1864d3042a70SBarry Smith @*/ 1865d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseResetArray(Mat mat) 1866d71ae5a4SJacob Faibussowitsch { 1867d3042a70SBarry Smith PetscFunctionBegin; 1868d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1869cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 18709566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 18713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1872d3042a70SBarry Smith } 1873d3042a70SBarry Smith 1874d5ea218eSStefano Zampini /*@ 1875d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1876d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 1877d5ea218eSStefano Zampini into a matrix 1878d5ea218eSStefano Zampini 1879d5ea218eSStefano Zampini Not Collective 1880d5ea218eSStefano Zampini 1881d5ea218eSStefano Zampini Input Parameters: 1882d5ea218eSStefano Zampini + mat - the matrix 1883d5ea218eSStefano Zampini - array - the array in column major order 1884d5ea218eSStefano Zampini 18852ef1f0ffSBarry Smith Level: developer 18862ef1f0ffSBarry Smith 188711a5261eSBarry Smith Note: 188811a5261eSBarry Smith The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1889d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 1890d5ea218eSStefano Zampini 18911cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1892d5ea218eSStefano Zampini @*/ 1893d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1894d71ae5a4SJacob Faibussowitsch { 1895d5ea218eSStefano Zampini PetscFunctionBegin; 1896d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1897cac4c232SBarry Smith PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 18989566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 189947d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1900d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1901d5ea218eSStefano Zampini #endif 19023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1903d5ea218eSStefano Zampini } 1904d5ea218eSStefano Zampini 1905637a0070SStefano Zampini /*@C 190611a5261eSBarry Smith MatCreateDense - Creates a matrix in `MATDENSE` format. 19078965ea79SLois Curfman McInnes 1908d083f849SBarry Smith Collective 1909db81eaa0SLois Curfman McInnes 19108965ea79SLois Curfman McInnes Input Parameters: 1911db81eaa0SLois Curfman McInnes + comm - MPI communicator 19122ef1f0ffSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 19132ef1f0ffSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 19142ef1f0ffSBarry Smith . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 19152ef1f0ffSBarry Smith . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 19162ef1f0ffSBarry Smith - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1917dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 19188965ea79SLois Curfman McInnes 19198965ea79SLois Curfman McInnes Output Parameter: 1920477f1c0bSLois Curfman McInnes . A - the matrix 19218965ea79SLois Curfman McInnes 19222ef1f0ffSBarry Smith Level: intermediate 19232ef1f0ffSBarry Smith 1924b259b22eSLois Curfman McInnes Notes: 19252ef1f0ffSBarry Smith The dense format is fully compatible with standard Fortran 192639ddd567SLois Curfman McInnes storage by columns. 192711a5261eSBarry Smith 192811a5261eSBarry Smith Although local portions of the matrix are stored in column-major 1929002bc6daSRichard Tran Mills order, the matrix is partitioned across MPI ranks by row. 19308965ea79SLois Curfman McInnes 193118f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 193218f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 19332ef1f0ffSBarry Smith set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 193418f449edSLois Curfman McInnes 19358965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 19368965ea79SLois Curfman McInnes (possibly both). 19378965ea79SLois Curfman McInnes 19381cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 19398965ea79SLois Curfman McInnes @*/ 1940d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1941d71ae5a4SJacob Faibussowitsch { 19423a40ed3dSBarry Smith PetscFunctionBegin; 19439566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 19449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 19456f08ad05SPierre Jolivet PetscCall(MatSetType(*A, MATDENSE)); 19469566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(*A, data)); 19476f08ad05SPierre Jolivet PetscCall(MatMPIDenseSetPreallocation(*A, data)); 19483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19498965ea79SLois Curfman McInnes } 19508965ea79SLois Curfman McInnes 1951d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1952d71ae5a4SJacob Faibussowitsch { 19538965ea79SLois Curfman McInnes Mat mat; 19543501a2bdSLois Curfman McInnes Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 19558965ea79SLois Curfman McInnes 19563a40ed3dSBarry Smith PetscFunctionBegin; 1957f4259b30SLisandro Dalcin *newmat = NULL; 19589566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 19599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 19609566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1961834f8fabSBarry Smith a = (Mat_MPIDense *)mat->data; 19625aa7edbeSHong Zhang 1963d5f3da31SBarry Smith mat->factortype = A->factortype; 1964c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1965273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 19668965ea79SLois Curfman McInnes 1967e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 19683782ba37SSatish Balay a->donotstash = oldmat->donotstash; 1969e04c1aa4SHong Zhang 19709566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 19719566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 19728965ea79SLois Curfman McInnes 19739566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 197401b82886SBarry Smith 19758965ea79SLois Curfman McInnes *newmat = mat; 19763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 19778965ea79SLois Curfman McInnes } 19788965ea79SLois Curfman McInnes 197966976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1980d71ae5a4SJacob Faibussowitsch { 198187d5ce66SSatish Balay PetscBool isbinary; 198287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 198387d5ce66SSatish Balay PetscBool ishdf5; 198487d5ce66SSatish Balay #endif 1985eb91f321SVaclav Hapla 1986eb91f321SVaclav Hapla PetscFunctionBegin; 1987eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1988eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1989eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 19909566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 19919566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 199287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 19939566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 199487d5ce66SSatish Balay #endif 1995eb91f321SVaclav Hapla if (isbinary) { 19969566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1997eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 199887d5ce66SSatish Balay } else if (ishdf5) { 19999566063dSJacob Faibussowitsch PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2000eb91f321SVaclav Hapla #endif 200198921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 20023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2003eb91f321SVaclav Hapla } 2004eb91f321SVaclav Hapla 2005d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2006d71ae5a4SJacob Faibussowitsch { 20076e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 20086e4ee0c6SHong Zhang Mat a, b; 200990ace30eSBarry Smith 20106e4ee0c6SHong Zhang PetscFunctionBegin; 20116e4ee0c6SHong Zhang a = matA->A; 20126e4ee0c6SHong Zhang b = matB->A; 20132cf15c64SPierre Jolivet PetscCall(MatEqual(a, b, flag)); 20142cf15c64SPierre Jolivet PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 20153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20166e4ee0c6SHong Zhang } 201790ace30eSBarry Smith 201866976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2019d71ae5a4SJacob Faibussowitsch { 20206718818eSStefano Zampini Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2021baa3c1c6SHong Zhang 2022baa3c1c6SHong Zhang PetscFunctionBegin; 20239566063dSJacob Faibussowitsch PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 20249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&atb->atb)); 20259566063dSJacob Faibussowitsch PetscCall(PetscFree(atb)); 20263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2027baa3c1c6SHong Zhang } 2028baa3c1c6SHong Zhang 202966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2030d71ae5a4SJacob Faibussowitsch { 20316718818eSStefano Zampini Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2032cc48ffa7SToby Isaac 2033cc48ffa7SToby Isaac PetscFunctionBegin; 20349566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 20359566063dSJacob Faibussowitsch PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 20369566063dSJacob Faibussowitsch PetscCall(PetscFree(abt)); 20373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2038cc48ffa7SToby Isaac } 2039cc48ffa7SToby Isaac 2040d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2041d71ae5a4SJacob Faibussowitsch { 2042baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 20436718818eSStefano Zampini Mat_TransMatMultDense *atb; 2044cb20be35SHong Zhang MPI_Comm comm; 20456718818eSStefano Zampini PetscMPIInt size, *recvcounts; 20466718818eSStefano Zampini PetscScalar *carray, *sendbuf; 2047637a0070SStefano Zampini const PetscScalar *atbarray; 2048a1f9a5e6SStefano Zampini PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2049e68c0b26SHong Zhang const PetscInt *ranges; 2050cb20be35SHong Zhang 2051cb20be35SHong Zhang PetscFunctionBegin; 20526718818eSStefano Zampini MatCheckProduct(C, 3); 20535f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 20546718818eSStefano Zampini atb = (Mat_TransMatMultDense *)C->product->data; 20556718818eSStefano Zampini recvcounts = atb->recvcounts; 20566718818eSStefano Zampini sendbuf = atb->sendbuf; 20576718818eSStefano Zampini 20589566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 20599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 2060e68c0b26SHong Zhang 2061c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 20629566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2063cb20be35SHong Zhang 20649566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 2065cb20be35SHong Zhang 2066660d5466SHong Zhang /* arrange atbarray into sendbuf */ 20679566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 20689566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2069637a0070SStefano Zampini for (proc = 0, k = 0; proc < size; proc++) { 2070baa3c1c6SHong Zhang for (j = 0; j < cN; j++) { 2071a1f9a5e6SStefano Zampini for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2072cb20be35SHong Zhang } 2073cb20be35SHong Zhang } 20749566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2075637a0070SStefano Zampini 2076c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 20779566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 20789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 20799566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 208067af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 20819566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 20829566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 20833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2084cb20be35SHong Zhang } 2085cb20be35SHong Zhang 2086d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2087d71ae5a4SJacob Faibussowitsch { 2088cb20be35SHong Zhang MPI_Comm comm; 2089baa3c1c6SHong Zhang PetscMPIInt size; 2090660d5466SHong Zhang PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2091baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 209247d993e7Ssuyashtn PetscBool cisdense = PETSC_FALSE; 20930acaeb71SStefano Zampini PetscInt i; 20940acaeb71SStefano Zampini const PetscInt *ranges; 2095cb20be35SHong Zhang 2096cb20be35SHong Zhang PetscFunctionBegin; 20978f83a4d9SJacob Faibussowitsch MatCheckProduct(C, 4); 20985f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 20999566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2100cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 210198921bdaSJacob Faibussowitsch SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2102cb20be35SHong Zhang } 2103cb20be35SHong Zhang 21044222ddf1SHong Zhang /* create matrix product C */ 21059566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 210647d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) 21079566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 210847d993e7Ssuyashtn #endif 210947d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP) 211047d993e7Ssuyashtn PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 211147d993e7Ssuyashtn #endif 211248a46eb9SPierre Jolivet if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 21139566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2114baa3c1c6SHong Zhang 21154222ddf1SHong Zhang /* create data structure for reuse C */ 21169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 21179566063dSJacob Faibussowitsch PetscCall(PetscNew(&atb)); 21184222ddf1SHong Zhang cM = C->rmap->N; 21199566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 21209566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(C, &ranges)); 21210acaeb71SStefano Zampini for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2122baa3c1c6SHong Zhang 21236718818eSStefano Zampini C->product->data = atb; 21246718818eSStefano Zampini C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 21253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2126cb20be35SHong Zhang } 2127cb20be35SHong Zhang 2128d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2129d71ae5a4SJacob Faibussowitsch { 2130cc48ffa7SToby Isaac MPI_Comm comm; 2131cc48ffa7SToby Isaac PetscMPIInt i, size; 2132cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2133cc48ffa7SToby Isaac PetscMPIInt tag; 21344222ddf1SHong Zhang PetscInt alg; 2135cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 21364222ddf1SHong Zhang Mat_Product *product = C->product; 21374222ddf1SHong Zhang PetscBool flg; 2138cc48ffa7SToby Isaac 2139cc48ffa7SToby Isaac PetscFunctionBegin; 21406718818eSStefano Zampini MatCheckProduct(C, 4); 21415f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 21424222ddf1SHong Zhang /* check local size of A and B */ 21435f80ce2aSJacob Faibussowitsch PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2144cc48ffa7SToby Isaac 21459566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2146637a0070SStefano Zampini alg = flg ? 0 : 1; 2147cc48ffa7SToby Isaac 21484222ddf1SHong Zhang /* setup matrix product C */ 21499566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 21509566063dSJacob Faibussowitsch PetscCall(MatSetType(C, MATMPIDENSE)); 21519566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 21529566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 21534222ddf1SHong Zhang 21544222ddf1SHong Zhang /* create data structure for reuse C */ 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 21569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 21579566063dSJacob Faibussowitsch PetscCall(PetscNew(&abt)); 2158cc48ffa7SToby Isaac abt->tag = tag; 2159faa55883SToby Isaac abt->alg = alg; 2160faa55883SToby Isaac switch (alg) { 21614222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2162cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2163cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2164f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2165faa55883SToby Isaac break; 21664222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2167f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2168f4f49eeaSPierre Jolivet PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2169faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2170faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2171faa55883SToby Isaac break; 2172faa55883SToby Isaac } 2173cc48ffa7SToby Isaac 21746718818eSStefano Zampini C->product->data = abt; 21756718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 21764222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 21773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2178cc48ffa7SToby Isaac } 2179cc48ffa7SToby Isaac 2180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2181d71ae5a4SJacob Faibussowitsch { 2182cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 21836718818eSStefano Zampini Mat_MatTransMultDense *abt; 2184cc48ffa7SToby Isaac MPI_Comm comm; 2185cc48ffa7SToby Isaac PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2186f4259b30SLisandro Dalcin PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2187cc48ffa7SToby Isaac PetscInt i, cK = A->cmap->N, k, j, bn; 2188cc48ffa7SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2189637a0070SStefano Zampini const PetscScalar *av, *bv; 219023fff9afSBarry Smith PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2191cc48ffa7SToby Isaac MPI_Request reqs[2]; 2192cc48ffa7SToby Isaac const PetscInt *ranges; 2193cc48ffa7SToby Isaac 2194cc48ffa7SToby Isaac PetscFunctionBegin; 21956718818eSStefano Zampini MatCheckProduct(C, 3); 21965f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 21976718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 21989566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 21999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 22009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 22019566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 22029566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 22039566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 22049566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 22059566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 22069566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &i)); 22079566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &blda)); 22089566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 22099566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 22109566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRanges(B, &ranges)); 2211cc48ffa7SToby Isaac bn = B->rmap->n; 2212637a0070SStefano Zampini if (blda == bn) { 2213637a0070SStefano Zampini sendbuf = (PetscScalar *)bv; 2214cc48ffa7SToby Isaac } else { 2215cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2216cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2217ad540459SPierre Jolivet for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2218cc48ffa7SToby Isaac } 2219cc48ffa7SToby Isaac } 2220cc48ffa7SToby Isaac if (size > 1) { 2221cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2222cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2223cc48ffa7SToby Isaac } else { 2224cc48ffa7SToby Isaac sendto = recvfrom = 0; 2225cc48ffa7SToby Isaac } 22269566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 22279566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2228cc48ffa7SToby Isaac recvisfrom = rank; 2229cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2230cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2231cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2232cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2233cc48ffa7SToby Isaac 2234cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2235cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2236cc48ffa7SToby Isaac sendsiz = cK * bn; 2237cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2238cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 22399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 22409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2241cc48ffa7SToby Isaac } 2242cc48ffa7SToby Isaac 2243cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 22449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2245792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2246cc48ffa7SToby Isaac 2247cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2248cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 22499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2250cc48ffa7SToby Isaac } 2251cc48ffa7SToby Isaac bn = nextbn; 2252cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2253cc48ffa7SToby Isaac sendbuf = recvbuf; 2254cc48ffa7SToby Isaac } 22559566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 22569566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 22579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 225867af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 22599566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 22609566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 22613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2262cc48ffa7SToby Isaac } 2263cc48ffa7SToby Isaac 2264d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2265d71ae5a4SJacob Faibussowitsch { 2266faa55883SToby Isaac Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 22676718818eSStefano Zampini Mat_MatTransMultDense *abt; 2268faa55883SToby Isaac MPI_Comm comm; 2269637a0070SStefano Zampini PetscMPIInt size; 2270637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2271637a0070SStefano Zampini const PetscScalar *av, *bv; 2272637a0070SStefano Zampini PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2273faa55883SToby Isaac PetscScalar _DOne = 1.0, _DZero = 0.0; 2274637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2275faa55883SToby Isaac 2276faa55883SToby Isaac PetscFunctionBegin; 22776718818eSStefano Zampini MatCheckProduct(C, 3); 22785f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 22796718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 22809566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 22819566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 22829566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(a->A, &av)); 22839566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(b->A, &bv)); 22849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 22859566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(a->A, &i)); 22869566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &alda)); 22879566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(b->A, &blda)); 22889566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(c->A, &i)); 22899566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(i, &clda)); 2290faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2291faa55883SToby Isaac bn = B->rmap->n; 2292faa55883SToby Isaac sendbuf = abt->buf[0]; 2293faa55883SToby Isaac recvbuf = abt->buf[1]; 2294faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2295ad540459SPierre Jolivet for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2296faa55883SToby Isaac } 22979566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 22989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 22999566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(cK, &ck)); 23009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 23019566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2302792fecdfSBarry Smith if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 23039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 23049566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 23059566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 230667af85e8SPierre Jolivet PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 23079566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 23089566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 23093ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2310faa55883SToby Isaac } 2311faa55883SToby Isaac 2312d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2313d71ae5a4SJacob Faibussowitsch { 23146718818eSStefano Zampini Mat_MatTransMultDense *abt; 2315faa55883SToby Isaac 2316faa55883SToby Isaac PetscFunctionBegin; 23176718818eSStefano Zampini MatCheckProduct(C, 3); 23185f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 23196718818eSStefano Zampini abt = (Mat_MatTransMultDense *)C->product->data; 2320faa55883SToby Isaac switch (abt->alg) { 2321d71ae5a4SJacob Faibussowitsch case 1: 2322d71ae5a4SJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2323d71ae5a4SJacob Faibussowitsch break; 2324d71ae5a4SJacob Faibussowitsch default: 2325d71ae5a4SJacob Faibussowitsch PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2326d71ae5a4SJacob Faibussowitsch break; 2327faa55883SToby Isaac } 23283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2329faa55883SToby Isaac } 2330faa55883SToby Isaac 233166976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2332d71ae5a4SJacob Faibussowitsch { 23336718818eSStefano Zampini Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2334320f2790SHong Zhang 2335320f2790SHong Zhang PetscFunctionBegin; 23369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ce)); 23379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Ae)); 23389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&ab->Be)); 23399566063dSJacob Faibussowitsch PetscCall(PetscFree(ab)); 23403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2341320f2790SHong Zhang } 2342320f2790SHong Zhang 23435d8c7819SPierre Jolivet static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2344d71ae5a4SJacob Faibussowitsch { 23456718818eSStefano Zampini Mat_MatMultDense *ab; 23465d8c7819SPierre Jolivet Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2347320f2790SHong Zhang 2348320f2790SHong Zhang PetscFunctionBegin; 23496718818eSStefano Zampini MatCheckProduct(C, 3); 23505f80ce2aSJacob Faibussowitsch PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 23516718818eSStefano Zampini ab = (Mat_MatMultDense *)C->product->data; 23525d8c7819SPierre Jolivet if (ab->Ae && ab->Ce) { 23535d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL) 23549566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 23559566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 23569566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 23579566063dSJacob Faibussowitsch PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 23585d8c7819SPierre Jolivet #else 23595d8c7819SPierre Jolivet SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 23605d8c7819SPierre Jolivet #endif 23615d8c7819SPierre Jolivet } else { 23625d8c7819SPierre Jolivet const PetscScalar *read; 23635d8c7819SPierre Jolivet PetscScalar *write; 23645d8c7819SPierre Jolivet PetscInt lda; 23655d8c7819SPierre Jolivet 23665d8c7819SPierre Jolivet PetscCall(MatDenseGetLDA(B, &lda)); 23675d8c7819SPierre Jolivet PetscCall(MatDenseGetArrayRead(B, &read)); 23685d8c7819SPierre Jolivet PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 23695d8c7819SPierre Jolivet if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 23705d8c7819SPierre Jolivet for (PetscInt i = 0; i < C->cmap->N; ++i) { 23715d8c7819SPierre Jolivet PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 23725d8c7819SPierre Jolivet PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 23735d8c7819SPierre Jolivet } 23745d8c7819SPierre Jolivet PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 23755d8c7819SPierre Jolivet PetscCall(MatDenseRestoreArrayRead(B, &read)); 2376f4f49eeaSPierre Jolivet PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 23775d8c7819SPierre Jolivet } 23783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2379320f2790SHong Zhang } 2380320f2790SHong Zhang 2381d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2382d71ae5a4SJacob Faibussowitsch { 23835d8c7819SPierre Jolivet Mat_Product *product = C->product; 23845d8c7819SPierre Jolivet PetscInt alg; 2385320f2790SHong Zhang Mat_MatMultDense *ab; 23865d8c7819SPierre Jolivet PetscBool flg; 2387320f2790SHong Zhang 2388320f2790SHong Zhang PetscFunctionBegin; 23896718818eSStefano Zampini MatCheckProduct(C, 4); 23905f80ce2aSJacob Faibussowitsch PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 23914222ddf1SHong Zhang /* check local size of A and B */ 23925d8c7819SPierre Jolivet PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 23935d8c7819SPierre Jolivet A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2394320f2790SHong Zhang 23955d8c7819SPierre Jolivet PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 23965d8c7819SPierre Jolivet alg = flg ? 0 : 1; 23974222ddf1SHong Zhang 23984222ddf1SHong Zhang /* setup C */ 23995d8c7819SPierre Jolivet PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 24005d8c7819SPierre Jolivet PetscCall(MatSetType(C, MATMPIDENSE)); 24019566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 2402320f2790SHong Zhang 2403320f2790SHong Zhang /* create data structure for reuse Cdense */ 24049566063dSJacob Faibussowitsch PetscCall(PetscNew(&ab)); 24055d8c7819SPierre Jolivet 24065d8c7819SPierre Jolivet switch (alg) { 24075d8c7819SPierre Jolivet case 1: /* alg: "elemental" */ 24085d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL) 24095d8c7819SPierre Jolivet /* create elemental matrices Ae and Be */ 24105d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 24115d8c7819SPierre Jolivet PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 24125d8c7819SPierre Jolivet PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 24135d8c7819SPierre Jolivet PetscCall(MatSetUp(ab->Ae)); 24145d8c7819SPierre Jolivet PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 24155d8c7819SPierre Jolivet 24165d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 24175d8c7819SPierre Jolivet PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 24185d8c7819SPierre Jolivet PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 24195d8c7819SPierre Jolivet PetscCall(MatSetUp(ab->Be)); 24205d8c7819SPierre Jolivet PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 24215d8c7819SPierre Jolivet 24225d8c7819SPierre Jolivet /* compute symbolic Ce = Ae*Be */ 24235d8c7819SPierre Jolivet PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 24245d8c7819SPierre Jolivet PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 24255d8c7819SPierre Jolivet #else 24265d8c7819SPierre Jolivet SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 24275d8c7819SPierre Jolivet #endif 24285d8c7819SPierre Jolivet break; 24295d8c7819SPierre Jolivet default: /* alg: "petsc" */ 24305d8c7819SPierre Jolivet ab->Ae = NULL; 24315d8c7819SPierre Jolivet PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 24325d8c7819SPierre Jolivet ab->Ce = NULL; 24335d8c7819SPierre Jolivet break; 24345d8c7819SPierre Jolivet } 24356718818eSStefano Zampini 24366718818eSStefano Zampini C->product->data = ab; 24376718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 24384222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 24393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2440320f2790SHong Zhang } 24412ef1f0ffSBarry Smith 2442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2443d71ae5a4SJacob Faibussowitsch { 24445d8c7819SPierre Jolivet Mat_Product *product = C->product; 24455d8c7819SPierre Jolivet const char *algTypes[2] = {"petsc", "elemental"}; 24465d8c7819SPierre Jolivet PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 24475d8c7819SPierre Jolivet PetscBool flg = PETSC_FALSE; 24485d8c7819SPierre Jolivet 24494d86920dSPierre Jolivet PetscFunctionBegin; 24505d8c7819SPierre Jolivet /* Set default algorithm */ 24515d8c7819SPierre Jolivet alg = 0; /* default is petsc */ 24525d8c7819SPierre Jolivet PetscCall(PetscStrcmp(product->alg, "default", &flg)); 24535d8c7819SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24545d8c7819SPierre Jolivet 24555d8c7819SPierre Jolivet /* Get runtime option */ 24565d8c7819SPierre Jolivet PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 24575d8c7819SPierre Jolivet PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 24585d8c7819SPierre Jolivet PetscOptionsEnd(); 24595d8c7819SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24605d8c7819SPierre Jolivet 24614222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 24624222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 24633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2464320f2790SHong Zhang } 246586aefd0dSHong Zhang 2466d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2467d71ae5a4SJacob Faibussowitsch { 24684222ddf1SHong Zhang Mat_Product *product = C->product; 24694222ddf1SHong Zhang Mat A = product->A, B = product->B; 24704222ddf1SHong Zhang 24714222ddf1SHong Zhang PetscFunctionBegin; 247207e90273SPierre Jolivet PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 247307e90273SPierre Jolivet A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 24746718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 24756718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_AtB; 24763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 24774222ddf1SHong Zhang } 24784222ddf1SHong Zhang 2479d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2480d71ae5a4SJacob Faibussowitsch { 24814222ddf1SHong Zhang Mat_Product *product = C->product; 24824222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv", "cyclic"}; 24834222ddf1SHong Zhang PetscInt alg, nalg = 2; 24844222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 24854222ddf1SHong Zhang 24864222ddf1SHong Zhang PetscFunctionBegin; 24874222ddf1SHong Zhang /* Set default algorithm */ 24884222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 24899566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 249048a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 24914222ddf1SHong Zhang 24924222ddf1SHong Zhang /* Get runtime option */ 24934222ddf1SHong Zhang if (product->api_user) { 2494d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 24959566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2496d0609cedSBarry Smith PetscOptionsEnd(); 24974222ddf1SHong Zhang } else { 2498d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 24999566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2500d0609cedSBarry Smith PetscOptionsEnd(); 25014222ddf1SHong Zhang } 250248a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 25034222ddf1SHong Zhang 25044222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 25054222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 25063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25074222ddf1SHong Zhang } 25084222ddf1SHong Zhang 25095d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2510d71ae5a4SJacob Faibussowitsch { 25114222ddf1SHong Zhang Mat_Product *product = C->product; 25124222ddf1SHong Zhang 25134222ddf1SHong Zhang PetscFunctionBegin; 25144222ddf1SHong Zhang switch (product->type) { 2515d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AB: 2516d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2517d71ae5a4SJacob Faibussowitsch break; 2518d71ae5a4SJacob Faibussowitsch case MATPRODUCT_AtB: 2519d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2520d71ae5a4SJacob Faibussowitsch break; 2521d71ae5a4SJacob Faibussowitsch case MATPRODUCT_ABt: 2522d71ae5a4SJacob Faibussowitsch PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2523d71ae5a4SJacob Faibussowitsch break; 2524d71ae5a4SJacob Faibussowitsch default: 2525d71ae5a4SJacob Faibussowitsch break; 25264222ddf1SHong Zhang } 25273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25284222ddf1SHong Zhang } 2529