xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0be0d8bde7cbed9986facbcd3689d3188b7894db)
1ed3cc1f0SBarry Smith /*
2ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
347d993e7Ssuyashtn    Portions of this code are under:
447d993e7Ssuyashtn    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5ed3cc1f0SBarry Smith */
6ed3cc1f0SBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
9baa3c1c6SHong Zhang #include <petscblaslapack.h>
108965ea79SLois Curfman McInnes 
11ab92ecdeSBarry Smith /*@
1211a5261eSBarry Smith   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13ab92ecdeSBarry Smith   matrix that represents the operator. For sequential matrices it returns itself.
14ab92ecdeSBarry Smith 
15ab92ecdeSBarry Smith   Input Parameter:
162ef1f0ffSBarry Smith . A - the sequential or MPI `MATDENSE` matrix
17ab92ecdeSBarry Smith 
18ab92ecdeSBarry Smith   Output Parameter:
19ab92ecdeSBarry Smith . B - the inner matrix
20ab92ecdeSBarry Smith 
218e6c10adSSatish Balay   Level: intermediate
228e6c10adSSatish Balay 
231cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24ab92ecdeSBarry Smith @*/
25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26d71ae5a4SJacob Faibussowitsch {
27ab92ecdeSBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28ace3abfcSBarry Smith   PetscBool     flg;
29ab92ecdeSBarry Smith 
30ab92ecdeSBarry Smith   PetscFunctionBegin;
31d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
324f572ea9SToby Isaac   PetscAssertPointer(B, 2);
339566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
342205254eSKarl Rupp   if (flg) *B = mat->A;
35f140bc17SStefano Zampini   else {
369566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
3728b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38f140bc17SStefano Zampini     *B = A;
39f140bc17SStefano Zampini   }
403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41ab92ecdeSBarry Smith }
42ab92ecdeSBarry Smith 
4366976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44d71ae5a4SJacob Faibussowitsch {
45a76f77c3SStefano Zampini   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46a76f77c3SStefano Zampini   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47a76f77c3SStefano Zampini 
48a76f77c3SStefano Zampini   PetscFunctionBegin;
499566063dSJacob Faibussowitsch   PetscCall(MatCopy(Amat->A, Bmat->A, s));
503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
51a76f77c3SStefano Zampini }
52a76f77c3SStefano Zampini 
53d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54d71ae5a4SJacob Faibussowitsch {
552f605a99SJose E. Roman   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
562f605a99SJose E. Roman   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
572f605a99SJose E. Roman   PetscScalar  *v;
582f605a99SJose E. Roman 
592f605a99SJose E. Roman   PetscFunctionBegin;
602f605a99SJose E. Roman   PetscCall(MatDenseGetArray(mat->A, &v));
612f605a99SJose E. Roman   PetscCall(MatDenseGetLDA(mat->A, &lda));
622f605a99SJose E. Roman   rend2 = PetscMin(rend, A->cmap->N);
632f605a99SJose E. Roman   if (rend2 > rstart) {
642f605a99SJose E. Roman     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
652f605a99SJose E. Roman     PetscCall(PetscLogFlops(rend2 - rstart));
662f605a99SJose E. Roman   }
672f605a99SJose E. Roman   PetscCall(MatDenseRestoreArray(mat->A, &v));
683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
692f605a99SJose E. Roman }
702f605a99SJose E. Roman 
7166976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72d71ae5a4SJacob Faibussowitsch {
73ba8c8a56SBarry Smith   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74d0f46423SBarry Smith   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75ba8c8a56SBarry Smith 
76ba8c8a56SBarry Smith   PetscFunctionBegin;
77aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78ba8c8a56SBarry Smith   lrow = row - rstart;
799566063dSJacob Faibussowitsch   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81ba8c8a56SBarry Smith }
82ba8c8a56SBarry Smith 
8366976f2fSJacob Faibussowitsch static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84d71ae5a4SJacob Faibussowitsch {
85637a0070SStefano Zampini   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86637a0070SStefano Zampini   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87ba8c8a56SBarry Smith 
88ba8c8a56SBarry Smith   PetscFunctionBegin;
89aed4548fSBarry Smith   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90637a0070SStefano Zampini   lrow = row - rstart;
919566063dSJacob Faibussowitsch   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
93ba8c8a56SBarry Smith }
94ba8c8a56SBarry Smith 
9566976f2fSJacob Faibussowitsch static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96d71ae5a4SJacob Faibussowitsch {
970de54da6SSatish Balay   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98d0f46423SBarry Smith   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
9987828ca2SBarry Smith   PetscScalar  *array;
1000de54da6SSatish Balay   MPI_Comm      comm;
1014b6ae80fSStefano Zampini   PetscBool     flg;
10211bd1e4dSLisandro Dalcin   Mat           B;
1030de54da6SSatish Balay 
1040de54da6SSatish Balay   PetscFunctionBegin;
1059566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &flg));
10628b400f6SJacob Faibussowitsch   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
1079566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA)
1109566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
11128b400f6SJacob Faibussowitsch     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP)
11347d993e7Ssuyashtn     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
11447d993e7Ssuyashtn     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
11547d993e7Ssuyashtn #endif
116f4f49eeaSPierre Jolivet     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
1179566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &B));
1189566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, m, m, m, m));
1199566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
1209566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
1219566063dSJacob Faibussowitsch     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
1229566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
1239566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
12411bd1e4dSLisandro Dalcin     *a = B;
1259566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
1262205254eSKarl Rupp   } else *a = B;
1273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1280de54da6SSatish Balay }
1290de54da6SSatish Balay 
13066976f2fSJacob Faibussowitsch static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131d71ae5a4SJacob Faibussowitsch {
13239b7565bSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134ace3abfcSBarry Smith   PetscBool     roworiented = A->roworiented;
1358965ea79SLois Curfman McInnes 
1363a40ed3dSBarry Smith   PetscFunctionBegin;
1378965ea79SLois Curfman McInnes   for (i = 0; i < m; i++) {
1385ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
13908401ef6SPierre Jolivet     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
1408965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1418965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
14239b7565bSBarry Smith       if (roworiented) {
143a80ceddfSStefano Zampini         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv));
1443a40ed3dSBarry Smith       } else {
1458965ea79SLois Curfman McInnes         for (j = 0; j < n; j++) {
1465ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
14708401ef6SPierre Jolivet           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148a80ceddfSStefano Zampini           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv));
14939b7565bSBarry Smith         }
1508965ea79SLois Curfman McInnes       }
1512205254eSKarl Rupp     } else if (!A->donotstash) {
1525080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
15339b7565bSBarry Smith       if (roworiented) {
154a80ceddfSStefano Zampini         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE));
155d36fbae8SSatish Balay       } else {
156a80ceddfSStefano Zampini         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE));
15739b7565bSBarry Smith       }
158b49de8d1SLois Curfman McInnes     }
159b49de8d1SLois Curfman McInnes   }
1603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
161b49de8d1SLois Curfman McInnes }
162b49de8d1SLois Curfman McInnes 
16366976f2fSJacob Faibussowitsch static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164d71ae5a4SJacob Faibussowitsch {
165b49de8d1SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166d0f46423SBarry Smith   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167b49de8d1SLois Curfman McInnes 
1683a40ed3dSBarry Smith   PetscFunctionBegin;
169b49de8d1SLois Curfman McInnes   for (i = 0; i < m; i++) {
17054c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
17154c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
173b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
174b49de8d1SLois Curfman McInnes       for (j = 0; j < n; j++) {
17554c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
17654c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
1779566063dSJacob Faibussowitsch         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178b49de8d1SLois Curfman McInnes       }
179e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
1808965ea79SLois Curfman McInnes   }
1813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1828965ea79SLois Curfman McInnes }
1838965ea79SLois Curfman McInnes 
184d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185d71ae5a4SJacob Faibussowitsch {
18649a6ff4bSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
18749a6ff4bSBarry Smith 
18849a6ff4bSBarry Smith   PetscFunctionBegin;
1899566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, lda));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19149a6ff4bSBarry Smith }
19249a6ff4bSBarry Smith 
193d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194d71ae5a4SJacob Faibussowitsch {
195ad16ce7aSStefano Zampini   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
19647d993e7Ssuyashtn   MatType       mtype = MATSEQDENSE;
197ad16ce7aSStefano Zampini 
198ad16ce7aSStefano Zampini   PetscFunctionBegin;
19917359960SJose E. Roman   if (!a->A) {
20028b400f6SJacob Faibussowitsch     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2019566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->rmap));
2029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(A->cmap));
2039566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
2049566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205a51cd166SPierre Jolivet #if PetscDefined(HAVE_CUDA)
20647d993e7Ssuyashtn     PetscBool iscuda;
2079566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
20847d993e7Ssuyashtn     if (iscuda) mtype = MATSEQDENSECUDA;
209a51cd166SPierre Jolivet #elif PetscDefined(HAVE_HIP)
21047d993e7Ssuyashtn     PetscBool iship;
21147d993e7Ssuyashtn     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
21247d993e7Ssuyashtn     if (iship) mtype = MATSEQDENSEHIP;
21347d993e7Ssuyashtn #endif
21447d993e7Ssuyashtn     PetscCall(MatSetType(a->A, mtype));
21517359960SJose E. Roman   }
2169566063dSJacob Faibussowitsch   PetscCall(MatDenseSetLDA(a->A, lda));
2173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
218ad16ce7aSStefano Zampini }
219ad16ce7aSStefano Zampini 
220d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221d71ae5a4SJacob Faibussowitsch {
222ff14e315SSatish Balay   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223ff14e315SSatish Balay 
2243a40ed3dSBarry Smith   PetscFunctionBegin;
22528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2269566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, array));
2273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
228ff14e315SSatish Balay }
229ff14e315SSatish Balay 
230f1e99121SPierre Jolivet static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231d71ae5a4SJacob Faibussowitsch {
2328572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2338572280aSBarry Smith 
2348572280aSBarry Smith   PetscFunctionBegin;
23528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236f1e99121SPierre Jolivet   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
2373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2388572280aSBarry Smith }
2398572280aSBarry Smith 
240d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241d71ae5a4SJacob Faibussowitsch {
2426947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2436947451fSStefano Zampini 
2446947451fSStefano Zampini   PetscFunctionBegin;
24528b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2469566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, array));
2473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2486947451fSStefano Zampini }
2496947451fSStefano Zampini 
250d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251d71ae5a4SJacob Faibussowitsch {
252d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253d3042a70SBarry Smith 
254d3042a70SBarry Smith   PetscFunctionBegin;
25528b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
25628b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2579566063dSJacob Faibussowitsch   PetscCall(MatDensePlaceArray(a->A, array));
2583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259d3042a70SBarry Smith }
260d3042a70SBarry Smith 
261d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262d71ae5a4SJacob Faibussowitsch {
263d3042a70SBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264d3042a70SBarry Smith 
265d3042a70SBarry Smith   PetscFunctionBegin;
26628b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
26728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2689566063dSJacob Faibussowitsch   PetscCall(MatDenseResetArray(a->A));
2693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
270d3042a70SBarry Smith }
271d3042a70SBarry Smith 
272d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273d71ae5a4SJacob Faibussowitsch {
274d5ea218eSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275d5ea218eSStefano Zampini 
276d5ea218eSStefano Zampini   PetscFunctionBegin;
27728b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
27828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2799566063dSJacob Faibussowitsch   PetscCall(MatDenseReplaceArray(a->A, array));
2803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
281d5ea218eSStefano Zampini }
282d5ea218eSStefano Zampini 
283d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284d71ae5a4SJacob Faibussowitsch {
285ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286637a0070SStefano Zampini   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
2875d0c19d7SBarry Smith   const PetscInt    *irow, *icol;
288637a0070SStefano Zampini   const PetscScalar *v;
289637a0070SStefano Zampini   PetscScalar       *bv;
290ca3fa75bSLois Curfman McInnes   Mat                newmat;
2914aa3045dSJed Brown   IS                 iscol_local;
29242a884f0SBarry Smith   MPI_Comm           comm_is, comm_mat;
293ca3fa75bSLois Curfman McInnes 
294ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
2959566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
2969566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
29708401ef6SPierre Jolivet   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
29842a884f0SBarry Smith 
2999566063dSJacob Faibussowitsch   PetscCall(ISAllGather(iscol, &iscol_local));
3009566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &irow));
3019566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &icol));
3029566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &nrows));
3039566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &ncols));
3049566063dSJacob Faibussowitsch   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305ca3fa75bSLois Curfman McInnes 
306ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
307da81f932SPierre Jolivet      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
3087eba5e9cSLois Curfman McInnes      original matrix! */
309ca3fa75bSLois Curfman McInnes 
3109566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
3119566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312ca3fa75bSLois Curfman McInnes 
313ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
314ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
315e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
3167eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
317ca3fa75bSLois Curfman McInnes     newmat = *B;
318ca3fa75bSLois Curfman McInnes   } else {
319ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
3209566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
3219566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
3229566063dSJacob Faibussowitsch     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
3239566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324ca3fa75bSLois Curfman McInnes   }
325ca3fa75bSLois Curfman McInnes 
326ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
327ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense *)newmat->data;
3289566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(newmatd->A, &bv));
3299566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mat->A, &v));
3309566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mat->A, &lda));
3314aa3045dSJed Brown   for (i = 0; i < Ncols; i++) {
332637a0070SStefano Zampini     const PetscScalar *av = v + lda * icol[i];
333ad540459SPierre Jolivet     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334ca3fa75bSLois Curfman McInnes   }
3359566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
3369566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337ca3fa75bSLois Curfman McInnes 
338ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
3399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
3409566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341ca3fa75bSLois Curfman McInnes 
342ca3fa75bSLois Curfman McInnes   /* Free work space */
3439566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &irow));
3449566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &icol));
3459566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&iscol_local));
346ca3fa75bSLois Curfman McInnes   *B = newmat;
3473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
348ca3fa75bSLois Curfman McInnes }
349ca3fa75bSLois Curfman McInnes 
35066976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351d71ae5a4SJacob Faibussowitsch {
35273a71a0fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
35373a71a0fSBarry Smith 
3543a40ed3dSBarry Smith   PetscFunctionBegin;
3559566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, array));
3563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
357ff14e315SSatish Balay }
358ff14e315SSatish Balay 
359f1e99121SPierre Jolivet static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360d71ae5a4SJacob Faibussowitsch {
3618572280aSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3628572280aSBarry Smith 
3638572280aSBarry Smith   PetscFunctionBegin;
364f1e99121SPierre Jolivet   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
3653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3668572280aSBarry Smith }
3678572280aSBarry Smith 
36866976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369d71ae5a4SJacob Faibussowitsch {
3706947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
3716947451fSStefano Zampini 
3726947451fSStefano Zampini   PetscFunctionBegin;
3739566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
3743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3756947451fSStefano Zampini }
3766947451fSStefano Zampini 
37766976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378d71ae5a4SJacob Faibussowitsch {
37939ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
38013f74950SBarry Smith   PetscInt      nstash, reallocs;
3818965ea79SLois Curfman McInnes 
3823a40ed3dSBarry Smith   PetscFunctionBegin;
3833ba16761SJacob Faibussowitsch   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
3848965ea79SLois Curfman McInnes 
3859566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
3869566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
3879566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
3883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3898965ea79SLois Curfman McInnes }
3908965ea79SLois Curfman McInnes 
39166976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392d71ae5a4SJacob Faibussowitsch {
39339ddd567SLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
39413f74950SBarry Smith   PetscInt      i, *row, *col, flg, j, rstart, ncols;
39513f74950SBarry Smith   PetscMPIInt   n;
39687828ca2SBarry Smith   PetscScalar  *val;
3978965ea79SLois Curfman McInnes 
3983a40ed3dSBarry Smith   PetscFunctionBegin;
399910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
4008965ea79SLois Curfman McInnes     /*  wait on receives */
4017ef1d9bdSSatish Balay     while (1) {
4029566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
4037ef1d9bdSSatish Balay       if (!flg) break;
4048965ea79SLois Curfman McInnes 
4057ef1d9bdSSatish Balay       for (i = 0; i < n;) {
4067ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
4072205254eSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
4082205254eSKarl Rupp           if (row[j] != rstart) break;
4092205254eSKarl Rupp         }
4107ef1d9bdSSatish Balay         if (j < n) ncols = j - i;
4117ef1d9bdSSatish Balay         else ncols = n - i;
4127ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
4139566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
4147ef1d9bdSSatish Balay         i = j;
4158965ea79SLois Curfman McInnes       }
4167ef1d9bdSSatish Balay     }
4179566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418910cf402Sprj-   }
4198965ea79SLois Curfman McInnes 
4209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mdn->A, mode));
4219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mdn->A, mode));
4223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4238965ea79SLois Curfman McInnes }
4248965ea79SLois Curfman McInnes 
42566976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426d71ae5a4SJacob Faibussowitsch {
42739ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
4283a40ed3dSBarry Smith 
4293a40ed3dSBarry Smith   PetscFunctionBegin;
4309566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
4313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4328965ea79SLois Curfman McInnes }
4338965ea79SLois Curfman McInnes 
43466976f2fSJacob Faibussowitsch static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435d71ae5a4SJacob Faibussowitsch {
43639ddd567SLois Curfman McInnes   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437637a0070SStefano Zampini   PetscInt      i, len, *lrows;
438637a0070SStefano Zampini 
439637a0070SStefano Zampini   PetscFunctionBegin;
440637a0070SStefano Zampini   /* get locally owned rows */
4419566063dSJacob Faibussowitsch   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
443637a0070SStefano Zampini   if (x && b) {
44497b48c8fSBarry Smith     const PetscScalar *xx;
44597b48c8fSBarry Smith     PetscScalar       *bb;
4468965ea79SLois Curfman McInnes 
4479566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
4489566063dSJacob Faibussowitsch     PetscCall(VecGetArrayWrite(b, &bb));
449637a0070SStefano Zampini     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
4509566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
4519566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayWrite(b, &bb));
45297b48c8fSBarry Smith   }
4539566063dSJacob Faibussowitsch   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454e2eb51b1SBarry Smith   if (diag != 0.0) {
455637a0070SStefano Zampini     Vec d;
456b9679d65SBarry Smith 
4579566063dSJacob Faibussowitsch     PetscCall(MatCreateVecs(A, NULL, &d));
4589566063dSJacob Faibussowitsch     PetscCall(VecSet(d, diag));
4599566063dSJacob Faibussowitsch     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
4609566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&d));
461b9679d65SBarry Smith   }
4629566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
4633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4648965ea79SLois Curfman McInnes }
4658965ea79SLois Curfman McInnes 
466*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469*0be0d8bdSHansol Suh PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470*0be0d8bdSHansol Suh 
47166976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472d71ae5a4SJacob Faibussowitsch {
47339ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474637a0070SStefano Zampini   const PetscScalar *ax;
475637a0070SStefano Zampini   PetscScalar       *ay;
476d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
477c456f294SBarry Smith 
4783a40ed3dSBarry Smith   PetscFunctionBegin;
4796f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
4809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
4819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
4829566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
4839566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
4849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
4859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
4869566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
4873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4888965ea79SLois Curfman McInnes }
4898965ea79SLois Curfman McInnes 
490*0be0d8bdSHansol Suh static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
491*0be0d8bdSHansol Suh {
492*0be0d8bdSHansol Suh   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493*0be0d8bdSHansol Suh   const PetscScalar *ax;
494*0be0d8bdSHansol Suh   PetscScalar       *ay;
495*0be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
496*0be0d8bdSHansol Suh 
497*0be0d8bdSHansol Suh   PetscFunctionBegin;
498*0be0d8bdSHansol Suh   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499*0be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500*0be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501*0be0d8bdSHansol Suh   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502*0be0d8bdSHansol Suh   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505*0be0d8bdSHansol Suh   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
506*0be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
507*0be0d8bdSHansol Suh }
508*0be0d8bdSHansol Suh 
50966976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
510d71ae5a4SJacob Faibussowitsch {
51139ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
512637a0070SStefano Zampini   const PetscScalar *ax;
513637a0070SStefano Zampini   PetscScalar       *ay;
514d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
515c456f294SBarry Smith 
5163a40ed3dSBarry Smith   PetscFunctionBegin;
5176f08ad05SPierre Jolivet   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
5189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
5199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
5209566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
5219566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
5229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
5239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
5249566063dSJacob Faibussowitsch   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
5253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5268965ea79SLois Curfman McInnes }
5278965ea79SLois Curfman McInnes 
528*0be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
529*0be0d8bdSHansol Suh {
530*0be0d8bdSHansol Suh   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
531*0be0d8bdSHansol Suh   const PetscScalar *ax;
532*0be0d8bdSHansol Suh   PetscScalar       *ay;
533*0be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
534*0be0d8bdSHansol Suh 
535*0be0d8bdSHansol Suh   PetscFunctionBegin;
536*0be0d8bdSHansol Suh   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
537*0be0d8bdSHansol Suh   PetscCall(VecSet(yy, 0.0));
538*0be0d8bdSHansol Suh   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
539*0be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
540*0be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
541*0be0d8bdSHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
542*0be0d8bdSHansol Suh   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
543*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
544*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
545*0be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
546*0be0d8bdSHansol Suh }
547*0be0d8bdSHansol Suh 
548459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
549d71ae5a4SJacob Faibussowitsch {
550096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
551637a0070SStefano Zampini   const PetscScalar *ax;
552637a0070SStefano Zampini   PetscScalar       *ay;
553d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
5544d86920dSPierre Jolivet 
5553a40ed3dSBarry Smith   PetscFunctionBegin;
5566f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
5579566063dSJacob Faibussowitsch   PetscCall(VecSet(yy, 0.0));
558459e8d23SBlanca Mellado Pinto   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
559459e8d23SBlanca Mellado Pinto   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
5609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
5619566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
5629566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
5639566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
5649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
5659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
5663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
567096963f5SLois Curfman McInnes }
568096963f5SLois Curfman McInnes 
569*0be0d8bdSHansol Suh static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
570*0be0d8bdSHansol Suh {
571*0be0d8bdSHansol Suh   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
572*0be0d8bdSHansol Suh   const PetscScalar *ax;
573*0be0d8bdSHansol Suh   PetscScalar       *ay;
574*0be0d8bdSHansol Suh   PetscMemType       axmtype, aymtype;
575*0be0d8bdSHansol Suh 
576*0be0d8bdSHansol Suh   PetscFunctionBegin;
577*0be0d8bdSHansol Suh   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
578*0be0d8bdSHansol Suh   PetscCall(VecCopy(yy, zz));
579*0be0d8bdSHansol Suh   PetscMPIInt rank;
580*0be0d8bdSHansol Suh   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
581*0be0d8bdSHansol Suh   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
582*0be0d8bdSHansol Suh   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
583*0be0d8bdSHansol Suh   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
584*0be0d8bdSHansol Suh   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
585*0be0d8bdSHansol Suh   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
586*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
587*0be0d8bdSHansol Suh   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
588*0be0d8bdSHansol Suh   PetscFunctionReturn(PETSC_SUCCESS);
589*0be0d8bdSHansol Suh }
590*0be0d8bdSHansol Suh 
591459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
592d71ae5a4SJacob Faibussowitsch {
593096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
594637a0070SStefano Zampini   const PetscScalar *ax;
595637a0070SStefano Zampini   PetscScalar       *ay;
596d0295fc0SJunchao Zhang   PetscMemType       axmtype, aymtype;
597096963f5SLois Curfman McInnes 
5983a40ed3dSBarry Smith   PetscFunctionBegin;
5996f08ad05SPierre Jolivet   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
6009566063dSJacob Faibussowitsch   PetscCall(VecCopy(yy, zz));
601459e8d23SBlanca Mellado Pinto   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
602459e8d23SBlanca Mellado Pinto   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
6039566063dSJacob Faibussowitsch   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
6049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
6059566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
6069566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
6079566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
6089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
6093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
610096963f5SLois Curfman McInnes }
611096963f5SLois Curfman McInnes 
612459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
613459e8d23SBlanca Mellado Pinto {
614459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
615459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
616459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
617459e8d23SBlanca Mellado Pinto }
618459e8d23SBlanca Mellado Pinto 
619459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
620459e8d23SBlanca Mellado Pinto {
621459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
622459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
623459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
624459e8d23SBlanca Mellado Pinto }
625459e8d23SBlanca Mellado Pinto 
626459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
627459e8d23SBlanca Mellado Pinto {
628459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
629459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
630459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
631459e8d23SBlanca Mellado Pinto }
632459e8d23SBlanca Mellado Pinto 
633459e8d23SBlanca Mellado Pinto static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
634459e8d23SBlanca Mellado Pinto {
635459e8d23SBlanca Mellado Pinto   PetscFunctionBegin;
636459e8d23SBlanca Mellado Pinto   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
637459e8d23SBlanca Mellado Pinto   PetscFunctionReturn(PETSC_SUCCESS);
638459e8d23SBlanca Mellado Pinto }
639459e8d23SBlanca Mellado Pinto 
640d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
641d71ae5a4SJacob Faibussowitsch {
64239ddd567SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
64379560b15SJacob Faibussowitsch   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
64479560b15SJacob Faibussowitsch   PetscScalar       *x;
645637a0070SStefano Zampini   const PetscScalar *av;
646ed3cc1f0SBarry Smith 
6473a40ed3dSBarry Smith   PetscFunctionBegin;
6489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
64979560b15SJacob Faibussowitsch   PetscCall(VecGetSize(v, &ng));
65079560b15SJacob Faibussowitsch   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
65179560b15SJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &nl));
652d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
653d0f46423SBarry Smith   radd = A->rmap->rstart * m;
6549566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
6559566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
656ad540459SPierre Jolivet   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
6579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
658*0be0d8bdSHansol Suh   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
6599566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
6603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6618965ea79SLois Curfman McInnes }
6628965ea79SLois Curfman McInnes 
66366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MPIDense(Mat mat)
664d71ae5a4SJacob Faibussowitsch {
6653501a2bdSLois Curfman McInnes   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
666ed3cc1f0SBarry Smith 
6673a40ed3dSBarry Smith   PetscFunctionBegin;
6683ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
6699566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
67028b400f6SJacob Faibussowitsch   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
67128b400f6SJacob Faibussowitsch   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
6729566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->A));
6739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->lvec));
6749566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&mdn->Mvctx));
6759566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mdn->cvec));
6769566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&mdn->cmat));
67701b82886SBarry Smith 
6789566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
6799566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
6808baccfbdSHong Zhang 
6819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
6829566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
6839566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
6849566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
6859566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
6869566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
6879566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
6889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
6899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
6909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
6919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
6929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
6939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
6948baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
6959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
6968baccfbdSHong Zhang #endif
697d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
6989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
699d24d4204SJose E. Roman #endif
7009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
7019566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
7029566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
7036718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
7049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
7059566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
7062e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
7072e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
7082e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
7092e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
7102e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
7112e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
7122e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
7132e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
7142e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
7152e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
7162e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
7172e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
7182e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
7192e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
7202e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
7213d9668e3SJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
7226718818eSStefano Zampini #endif
72347d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
72447d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
72547d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
72647d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
72747d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
72847d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
72947d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
73047d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
73147d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
73247d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
73347d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
73447d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
73547d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
73647d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
73747d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
73847d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
73947d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
74047d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
7413d9668e3SJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
74247d993e7Ssuyashtn #endif
7439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
7449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
7459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
7469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
7479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
7489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
7499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
7509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
7519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
7529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
753*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
754*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
755*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
756f8103e6bSStefano Zampini 
757f8103e6bSStefano Zampini   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
7583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7598965ea79SLois Curfman McInnes }
76039ddd567SLois Curfman McInnes 
7619804daf3SBarry Smith #include <petscdraw.h>
762d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
763d71ae5a4SJacob Faibussowitsch {
76439ddd567SLois Curfman McInnes   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
765616b8fbbSStefano Zampini   PetscMPIInt       rank;
76619fd82e9SBarry Smith   PetscViewerType   vtype;
767ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
768b0a32e0cSBarry Smith   PetscViewer       sviewer;
769f3ef73ceSBarry Smith   PetscViewerFormat format;
7708965ea79SLois Curfman McInnes 
7713a40ed3dSBarry Smith   PetscFunctionBegin;
7729566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7739566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
7749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
77532077d6dSBarry Smith   if (iascii) {
7769566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetType(viewer, &vtype));
7779566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
778456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
7794e220ebcSLois Curfman McInnes       MatInfo info;
7809566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
7819566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
7829371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
7839371c9d4SSatish Balay                                                    (PetscInt)info.memory));
7849566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
7859566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
7866f08ad05SPierre Jolivet       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
7873ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
788fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
7893ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
7908965ea79SLois Curfman McInnes     }
791f1af5d2fSBarry Smith   } else if (isdraw) {
792b0a32e0cSBarry Smith     PetscDraw draw;
793ace3abfcSBarry Smith     PetscBool isnull;
794f1af5d2fSBarry Smith 
7959566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
7969566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
7973ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
798f1af5d2fSBarry Smith   }
79977ed5343SBarry Smith 
8007da1fb6eSBarry Smith   {
8018965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
8028965ea79SLois Curfman McInnes     Mat          A;
803d0f46423SBarry Smith     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
804ba8c8a56SBarry Smith     PetscInt    *cols;
805ba8c8a56SBarry Smith     PetscScalar *vals;
8068965ea79SLois Curfman McInnes 
8079566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
808dd400576SPatrick Sanan     if (rank == 0) {
8099566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
8103a40ed3dSBarry Smith     } else {
8119566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
8128965ea79SLois Curfman McInnes     }
8137adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
8149566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPIDENSE));
8159566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
8168965ea79SLois Curfman McInnes 
81739ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
81839ddd567SLois Curfman McInnes        but it's quick for now */
81951022da4SBarry Smith     A->insertmode = INSERT_VALUES;
8202205254eSKarl Rupp 
8212205254eSKarl Rupp     row = mat->rmap->rstart;
8222205254eSKarl Rupp     m   = mdn->A->rmap->n;
8238965ea79SLois Curfman McInnes     for (i = 0; i < m; i++) {
8249566063dSJacob Faibussowitsch       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
8259566063dSJacob Faibussowitsch       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
8269566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
82739ddd567SLois Curfman McInnes       row++;
8288965ea79SLois Curfman McInnes     }
8298965ea79SLois Curfman McInnes 
8309566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
8319566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
8329566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
833dd400576SPatrick Sanan     if (rank == 0) {
834f4f49eeaSPierre Jolivet       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
835f4f49eeaSPierre Jolivet       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
8368965ea79SLois Curfman McInnes     }
8379566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
8389566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
8398965ea79SLois Curfman McInnes   }
8403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8418965ea79SLois Curfman McInnes }
8428965ea79SLois Curfman McInnes 
84366976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
844d71ae5a4SJacob Faibussowitsch {
845ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw, issocket;
8468965ea79SLois Curfman McInnes 
847433994e6SBarry Smith   PetscFunctionBegin;
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
8499566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
8509566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
8519566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
8520f5bd95cSBarry Smith 
85332077d6dSBarry Smith   if (iascii || issocket || isdraw) {
8549566063dSJacob Faibussowitsch     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
8551baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
8563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8578965ea79SLois Curfman McInnes }
8588965ea79SLois Curfman McInnes 
85966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
860d71ae5a4SJacob Faibussowitsch {
8613501a2bdSLois Curfman McInnes   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
8623501a2bdSLois Curfman McInnes   Mat            mdn = mat->A;
8633966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
8648965ea79SLois Curfman McInnes 
8653a40ed3dSBarry Smith   PetscFunctionBegin;
8664e220ebcSLois Curfman McInnes   info->block_size = 1.0;
8672205254eSKarl Rupp 
8689566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
8692205254eSKarl Rupp 
8709371c9d4SSatish Balay   isend[0] = info->nz_used;
8719371c9d4SSatish Balay   isend[1] = info->nz_allocated;
8729371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
8739371c9d4SSatish Balay   isend[3] = info->memory;
8749371c9d4SSatish Balay   isend[4] = info->mallocs;
8758965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
8764e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
8774e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
8784e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
8794e220ebcSLois Curfman McInnes     info->memory       = isend[3];
8804e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
8818965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
8821c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
8832205254eSKarl Rupp 
8844e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8854e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8864e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8874e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8884e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8898965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
8901c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
8912205254eSKarl Rupp 
8924e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8934e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8944e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8954e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8964e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8978965ea79SLois Curfman McInnes   }
8984e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8994e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9004e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9028965ea79SLois Curfman McInnes }
9038965ea79SLois Curfman McInnes 
90466976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
905d71ae5a4SJacob Faibussowitsch {
90639ddd567SLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
9078965ea79SLois Curfman McInnes 
9083a40ed3dSBarry Smith   PetscFunctionBegin;
90912c028f9SKris Buschelman   switch (op) {
910512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
91112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
91212c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
91343674050SBarry Smith     MatCheckPreallocated(A, 1);
9149566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
91512c028f9SKris Buschelman     break;
91612c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
91743674050SBarry Smith     MatCheckPreallocated(A, 1);
9184e0d8c25SBarry Smith     a->roworiented = flg;
9199566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
92012c028f9SKris Buschelman     break;
9218c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
92213fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
92312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
924d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
925d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
926d71ae5a4SJacob Faibussowitsch     break;
927d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
928d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
929d71ae5a4SJacob Faibussowitsch     break;
93077e54ba9SKris Buschelman   case MAT_SYMMETRIC:
93177e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
9329a4540c5SBarry Smith   case MAT_HERMITIAN:
9339a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
934b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
935b94d7dedSBarry Smith   case MAT_SPD:
936600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
9375d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
938b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
939b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
9409566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
94177e54ba9SKris Buschelman     break;
942d71ae5a4SJacob Faibussowitsch   default:
943d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
9443a40ed3dSBarry Smith   }
9453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9468965ea79SLois Curfman McInnes }
9478965ea79SLois Curfman McInnes 
94866976f2fSJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
949d71ae5a4SJacob Faibussowitsch {
9505b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
951637a0070SStefano Zampini   const PetscScalar *l;
952637a0070SStefano Zampini   PetscScalar        x, *v, *vv, *r;
953637a0070SStefano Zampini   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
9545b2fa520SLois Curfman McInnes 
9555b2fa520SLois Curfman McInnes   PetscFunctionBegin;
9569566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(mdn->A, &vv));
9579566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(mdn->A, &lda));
9589566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, &s2, &s3));
9595b2fa520SLois Curfman McInnes   if (ll) {
9609566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s2a));
96108401ef6SPierre Jolivet     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
9629566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(ll, &l));
9635b2fa520SLois Curfman McInnes     for (i = 0; i < m; i++) {
9645b2fa520SLois Curfman McInnes       x = l[i];
965637a0070SStefano Zampini       v = vv + i;
9669371c9d4SSatish Balay       for (j = 0; j < n; j++) {
9679371c9d4SSatish Balay         (*v) *= x;
9689371c9d4SSatish Balay         v += lda;
9699371c9d4SSatish Balay       }
9705b2fa520SLois Curfman McInnes     }
9719566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(ll, &l));
9729566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
9735b2fa520SLois Curfman McInnes   }
9745b2fa520SLois Curfman McInnes   if (rr) {
975637a0070SStefano Zampini     const PetscScalar *ar;
976637a0070SStefano Zampini 
9779566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s3a));
97808401ef6SPierre Jolivet     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
9799566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(rr, &ar));
9806f08ad05SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
9819566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mdn->lvec, &r));
9829566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
9839566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
9849566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(rr, &ar));
9855b2fa520SLois Curfman McInnes     for (i = 0; i < n; i++) {
9865b2fa520SLois Curfman McInnes       x = r[i];
987637a0070SStefano Zampini       v = vv + i * lda;
9882205254eSKarl Rupp       for (j = 0; j < m; j++) (*v++) *= x;
9895b2fa520SLois Curfman McInnes     }
9909566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mdn->lvec, &r));
9919566063dSJacob Faibussowitsch     PetscCall(PetscLogFlops(1.0 * n * m));
9925b2fa520SLois Curfman McInnes   }
9939566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
9943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9955b2fa520SLois Curfman McInnes }
9965b2fa520SLois Curfman McInnes 
99766976f2fSJacob Faibussowitsch static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
998d71ae5a4SJacob Faibussowitsch {
9993501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
100013f74950SBarry Smith   PetscInt           i, j;
1001616b8fbbSStefano Zampini   PetscMPIInt        size;
1002329f5518SBarry Smith   PetscReal          sum = 0.0;
1003637a0070SStefano Zampini   const PetscScalar *av, *v;
10043501a2bdSLois Curfman McInnes 
10053a40ed3dSBarry Smith   PetscFunctionBegin;
10069566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1007637a0070SStefano Zampini   v = av;
10089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1009616b8fbbSStefano Zampini   if (size == 1) {
10109566063dSJacob Faibussowitsch     PetscCall(MatNorm(mdn->A, type, nrm));
10113501a2bdSLois Curfman McInnes   } else {
10123501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
1013d0f46423SBarry Smith       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
10149371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
10159371c9d4SSatish Balay         v++;
10163501a2bdSLois Curfman McInnes       }
10171c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
10188f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
10199566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
10203a40ed3dSBarry Smith     } else if (type == NORM_1) {
1021329f5518SBarry Smith       PetscReal *tmp, *tmp2;
10229566063dSJacob Faibussowitsch       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1023064f8208SBarry Smith       *nrm = 0.0;
1024637a0070SStefano Zampini       v    = av;
1025d0f46423SBarry Smith       for (j = 0; j < mdn->A->cmap->n; j++) {
1026d0f46423SBarry Smith         for (i = 0; i < mdn->A->rmap->n; i++) {
10279371c9d4SSatish Balay           tmp[j] += PetscAbsScalar(*v);
10289371c9d4SSatish Balay           v++;
10293501a2bdSLois Curfman McInnes         }
10303501a2bdSLois Curfman McInnes       }
10311c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1032d0f46423SBarry Smith       for (j = 0; j < A->cmap->N; j++) {
1033064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
10343501a2bdSLois Curfman McInnes       }
10359566063dSJacob Faibussowitsch       PetscCall(PetscFree2(tmp, tmp2));
10369566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
10373a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
1038329f5518SBarry Smith       PetscReal ntemp;
10399566063dSJacob Faibussowitsch       PetscCall(MatNorm(mdn->A, type, &ntemp));
10401c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1041ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
10423501a2bdSLois Curfman McInnes   }
10439566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
10443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10453501a2bdSLois Curfman McInnes }
10463501a2bdSLois Curfman McInnes 
104766976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1048d71ae5a4SJacob Faibussowitsch {
10493501a2bdSLois Curfman McInnes   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
10503501a2bdSLois Curfman McInnes   Mat           B;
1051d0f46423SBarry Smith   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1052637a0070SStefano Zampini   PetscInt      j, i, lda;
105387828ca2SBarry Smith   PetscScalar  *v;
10543501a2bdSLois Curfman McInnes 
10553a40ed3dSBarry Smith   PetscFunctionBegin;
10567fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1057cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
10589566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
10599566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
10609566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
10619566063dSJacob Faibussowitsch     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1062637a0070SStefano Zampini   } else B = *matout;
10633501a2bdSLois Curfman McInnes 
10649371c9d4SSatish Balay   m = a->A->rmap->n;
10659371c9d4SSatish Balay   n = a->A->cmap->n;
10669566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
10679566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
10689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rwork));
10693501a2bdSLois Curfman McInnes   for (i = 0; i < m; i++) rwork[i] = rstart + i;
10701acff37aSSatish Balay   for (j = 0; j < n; j++) {
10719566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
10728e3a54c0SPierre Jolivet     v = PetscSafePointerPlusOffset(v, lda);
10733501a2bdSLois Curfman McInnes   }
10749566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
10759566063dSJacob Faibussowitsch   PetscCall(PetscFree(rwork));
10769566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
10779566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1078cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
10793501a2bdSLois Curfman McInnes     *matout = B;
10803501a2bdSLois Curfman McInnes   } else {
10819566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &B));
10823501a2bdSLois Curfman McInnes   }
10833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1084096963f5SLois Curfman McInnes }
1085096963f5SLois Curfman McInnes 
10866849ba73SBarry Smith static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
108752c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
10888965ea79SLois Curfman McInnes 
108966976f2fSJacob Faibussowitsch static PetscErrorCode MatSetUp_MPIDense(Mat A)
1090d71ae5a4SJacob Faibussowitsch {
1091273d9f13SBarry Smith   PetscFunctionBegin;
10929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->rmap));
10939566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(A->cmap));
109448a46eb9SPierre Jolivet   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
10953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1096273d9f13SBarry Smith }
1097273d9f13SBarry Smith 
109866976f2fSJacob Faibussowitsch static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1099d71ae5a4SJacob Faibussowitsch {
1100488007eeSBarry Smith   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1101488007eeSBarry Smith 
1102488007eeSBarry Smith   PetscFunctionBegin;
11039566063dSJacob Faibussowitsch   PetscCall(MatAXPY(A->A, alpha, B->A, str));
11043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1105488007eeSBarry Smith }
1106488007eeSBarry Smith 
110766976f2fSJacob Faibussowitsch static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1108d71ae5a4SJacob Faibussowitsch {
1109ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1110ba337c44SJed Brown 
1111ba337c44SJed Brown   PetscFunctionBegin;
11129566063dSJacob Faibussowitsch   PetscCall(MatConjugate(a->A));
11133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1114ba337c44SJed Brown }
1115ba337c44SJed Brown 
111666976f2fSJacob Faibussowitsch static PetscErrorCode MatRealPart_MPIDense(Mat A)
1117d71ae5a4SJacob Faibussowitsch {
1118ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1119ba337c44SJed Brown 
1120ba337c44SJed Brown   PetscFunctionBegin;
11219566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
11223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1123ba337c44SJed Brown }
1124ba337c44SJed Brown 
112566976f2fSJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1126d71ae5a4SJacob Faibussowitsch {
1127ba337c44SJed Brown   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1128ba337c44SJed Brown 
1129ba337c44SJed Brown   PetscFunctionBegin;
11309566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
11313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1132ba337c44SJed Brown }
1133ba337c44SJed Brown 
1134d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1135d71ae5a4SJacob Faibussowitsch {
1136637a0070SStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
113749a6ff4bSBarry Smith 
113849a6ff4bSBarry Smith   PetscFunctionBegin;
113928b400f6SJacob Faibussowitsch   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
114028b400f6SJacob Faibussowitsch   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
11419566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
11423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
114349a6ff4bSBarry Smith }
114449a6ff4bSBarry Smith 
1145857cbf51SRichard Tran Mills PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
114652c5f739Sprj- 
114766976f2fSJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1148d71ae5a4SJacob Faibussowitsch {
1149a873a8cdSSam Reynolds   PetscInt      i, m, n;
11500716a85fSBarry Smith   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
11510716a85fSBarry Smith   PetscReal    *work;
11520716a85fSBarry Smith 
11530716a85fSBarry Smith   PetscFunctionBegin;
11549566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
11559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &work));
1156857cbf51SRichard Tran Mills   if (type == REDUCTION_MEAN_REALPART) {
11579566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1158857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
11599566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1160a873a8cdSSam Reynolds   } else {
11619566063dSJacob Faibussowitsch     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1162a873a8cdSSam Reynolds   }
1163857cbf51SRichard Tran Mills   if (type == NORM_2) {
11640716a85fSBarry Smith     for (i = 0; i < n; i++) work[i] *= work[i];
11650716a85fSBarry Smith   }
1166857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
11671c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
11680716a85fSBarry Smith   } else {
11691c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
11700716a85fSBarry Smith   }
11719566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
1172857cbf51SRichard Tran Mills   if (type == NORM_2) {
1173a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1174857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1175a873a8cdSSam Reynolds     for (i = 0; i < n; i++) reductions[i] /= m;
11760716a85fSBarry Smith   }
11773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11780716a85fSBarry Smith }
11790716a85fSBarry Smith 
1180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1181d71ae5a4SJacob Faibussowitsch {
118273a71a0fSBarry Smith   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
118373a71a0fSBarry Smith 
118473a71a0fSBarry Smith   PetscFunctionBegin;
11859566063dSJacob Faibussowitsch   PetscCall(MatSetRandom(d->A, rctx));
11863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
11873faff063SStefano Zampini   x->offloadmask = d->A->offloadmask;
11883faff063SStefano Zampini #endif
11893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119073a71a0fSBarry Smith }
119173a71a0fSBarry Smith 
1192d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1193d71ae5a4SJacob Faibussowitsch {
11943b49f96aSBarry Smith   PetscFunctionBegin;
11953b49f96aSBarry Smith   *missing = PETSC_FALSE;
11963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11973b49f96aSBarry Smith }
11983b49f96aSBarry Smith 
11994222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1200cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12016718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1202a1f9a5e6SStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
12036718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
12046718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
12055d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1206cc48ffa7SToby Isaac 
120709dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
120809dc0095SBarry Smith                                        MatGetRow_MPIDense,
120909dc0095SBarry Smith                                        MatRestoreRow_MPIDense,
121009dc0095SBarry Smith                                        MatMult_MPIDense,
121197304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPIDense,
12127c922b88SBarry Smith                                        MatMultTranspose_MPIDense,
12137c922b88SBarry Smith                                        MatMultTransposeAdd_MPIDense,
1214f4259b30SLisandro Dalcin                                        NULL,
1215f4259b30SLisandro Dalcin                                        NULL,
1216f4259b30SLisandro Dalcin                                        NULL,
1217f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1218f4259b30SLisandro Dalcin                                        NULL,
1219f4259b30SLisandro Dalcin                                        NULL,
1220f4259b30SLisandro Dalcin                                        NULL,
122109dc0095SBarry Smith                                        MatTranspose_MPIDense,
122297304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPIDense,
12236e4ee0c6SHong Zhang                                        MatEqual_MPIDense,
122409dc0095SBarry Smith                                        MatGetDiagonal_MPIDense,
12255b2fa520SLois Curfman McInnes                                        MatDiagonalScale_MPIDense,
122609dc0095SBarry Smith                                        MatNorm_MPIDense,
122797304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPIDense,
122809dc0095SBarry Smith                                        MatAssemblyEnd_MPIDense,
122909dc0095SBarry Smith                                        MatSetOption_MPIDense,
123009dc0095SBarry Smith                                        MatZeroEntries_MPIDense,
1231d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_MPIDense,
1232f4259b30SLisandro Dalcin                                        NULL,
1233f4259b30SLisandro Dalcin                                        NULL,
1234f4259b30SLisandro Dalcin                                        NULL,
1235f4259b30SLisandro Dalcin                                        NULL,
12364994cf47SJed Brown                                        /* 29*/ MatSetUp_MPIDense,
1237f4259b30SLisandro Dalcin                                        NULL,
1238f4259b30SLisandro Dalcin                                        NULL,
1239c56a70eeSBarry Smith                                        MatGetDiagonalBlock_MPIDense,
1240f4259b30SLisandro Dalcin                                        NULL,
1241d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPIDense,
1242f4259b30SLisandro Dalcin                                        NULL,
1243f4259b30SLisandro Dalcin                                        NULL,
1244f4259b30SLisandro Dalcin                                        NULL,
1245f4259b30SLisandro Dalcin                                        NULL,
1246d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPIDense,
12477dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIDense,
1248f4259b30SLisandro Dalcin                                        NULL,
124909dc0095SBarry Smith                                        MatGetValues_MPIDense,
1250a76f77c3SStefano Zampini                                        MatCopy_MPIDense,
1251f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
125209dc0095SBarry Smith                                        MatScale_MPIDense,
12532f605a99SJose E. Roman                                        MatShift_MPIDense,
1254f4259b30SLisandro Dalcin                                        NULL,
1255f4259b30SLisandro Dalcin                                        NULL,
125673a71a0fSBarry Smith                                        /* 49*/ MatSetRandom_MPIDense,
1257f4259b30SLisandro Dalcin                                        NULL,
1258f4259b30SLisandro Dalcin                                        NULL,
1259f4259b30SLisandro Dalcin                                        NULL,
1260f4259b30SLisandro Dalcin                                        NULL,
1261f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1262f4259b30SLisandro Dalcin                                        NULL,
1263f4259b30SLisandro Dalcin                                        NULL,
1264f4259b30SLisandro Dalcin                                        NULL,
1265f4259b30SLisandro Dalcin                                        NULL,
12667dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1267b9b97703SBarry Smith                                        MatDestroy_MPIDense,
1268b9b97703SBarry Smith                                        MatView_MPIDense,
1269f4259b30SLisandro Dalcin                                        NULL,
1270f4259b30SLisandro Dalcin                                        NULL,
1271f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1272f4259b30SLisandro Dalcin                                        NULL,
1273f4259b30SLisandro Dalcin                                        NULL,
1274f4259b30SLisandro Dalcin                                        NULL,
1275f4259b30SLisandro Dalcin                                        NULL,
1276f4259b30SLisandro Dalcin                                        /* 69*/ NULL,
1277f4259b30SLisandro Dalcin                                        NULL,
1278f4259b30SLisandro Dalcin                                        NULL,
1279f4259b30SLisandro Dalcin                                        NULL,
1280f4259b30SLisandro Dalcin                                        NULL,
1281f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1282f4259b30SLisandro Dalcin                                        NULL,
1283f4259b30SLisandro Dalcin                                        NULL,
1284f4259b30SLisandro Dalcin                                        NULL,
1285f4259b30SLisandro Dalcin                                        NULL,
1286f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1287f4259b30SLisandro Dalcin                                        NULL,
1288f4259b30SLisandro Dalcin                                        NULL,
1289f4259b30SLisandro Dalcin                                        NULL,
12905bba2384SShri Abhyankar                                        /* 83*/ MatLoad_MPIDense,
1291f4259b30SLisandro Dalcin                                        NULL,
1292f4259b30SLisandro Dalcin                                        NULL,
1293f4259b30SLisandro Dalcin                                        NULL,
1294f4259b30SLisandro Dalcin                                        NULL,
1295f4259b30SLisandro Dalcin                                        NULL,
1296f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1297f4259b30SLisandro Dalcin                                        NULL,
1298f4259b30SLisandro Dalcin                                        NULL,
1299f4259b30SLisandro Dalcin                                        NULL,
1300f4259b30SLisandro Dalcin                                        NULL,
1301f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1302f4259b30SLisandro Dalcin                                        NULL,
13036718818eSStefano Zampini                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1304cc48ffa7SToby Isaac                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1305f4259b30SLisandro Dalcin                                        NULL,
13064222ddf1SHong Zhang                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1307f4259b30SLisandro Dalcin                                        NULL,
1308f4259b30SLisandro Dalcin                                        NULL,
1309ba337c44SJed Brown                                        MatConjugate_MPIDense,
1310f4259b30SLisandro Dalcin                                        NULL,
1311f4259b30SLisandro Dalcin                                        /*104*/ NULL,
1312ba337c44SJed Brown                                        MatRealPart_MPIDense,
1313ba337c44SJed Brown                                        MatImaginaryPart_MPIDense,
1314f4259b30SLisandro Dalcin                                        NULL,
1315f4259b30SLisandro Dalcin                                        NULL,
1316f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1317f4259b30SLisandro Dalcin                                        NULL,
1318f4259b30SLisandro Dalcin                                        NULL,
131949a6ff4bSBarry Smith                                        MatGetColumnVector_MPIDense,
13203b49f96aSBarry Smith                                        MatMissingDiagonal_MPIDense,
1321f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1322f4259b30SLisandro Dalcin                                        NULL,
1323f4259b30SLisandro Dalcin                                        NULL,
1324f4259b30SLisandro Dalcin                                        NULL,
1325f4259b30SLisandro Dalcin                                        NULL,
1326f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1327f4259b30SLisandro Dalcin                                        NULL,
1328459e8d23SBlanca Mellado Pinto                                        MatMultHermitianTranspose_MPIDense,
1329459e8d23SBlanca Mellado Pinto                                        MatMultHermitianTransposeAdd_MPIDense,
1330f4259b30SLisandro Dalcin                                        NULL,
1331f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1332a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIDense,
1333f4259b30SLisandro Dalcin                                        NULL,
1334f4259b30SLisandro Dalcin                                        NULL,
1335f4259b30SLisandro Dalcin                                        NULL,
1336f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1337f4259b30SLisandro Dalcin                                        NULL,
13386718818eSStefano Zampini                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1339cb20be35SHong Zhang                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1340f4259b30SLisandro Dalcin                                        NULL,
1341f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1342f4259b30SLisandro Dalcin                                        NULL,
1343f4259b30SLisandro Dalcin                                        NULL,
1344f4259b30SLisandro Dalcin                                        NULL,
1345f4259b30SLisandro Dalcin                                        NULL,
1346f4259b30SLisandro Dalcin                                        /*139*/ NULL,
1347f4259b30SLisandro Dalcin                                        NULL,
1348f4259b30SLisandro Dalcin                                        NULL,
1349f4259b30SLisandro Dalcin                                        NULL,
1350f4259b30SLisandro Dalcin                                        NULL,
13514222ddf1SHong Zhang                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1352f4259b30SLisandro Dalcin                                        /*145*/ NULL,
1353f4259b30SLisandro Dalcin                                        NULL,
135499a7f59eSMark Adams                                        NULL,
135599a7f59eSMark Adams                                        NULL,
13567fb60732SBarry Smith                                        NULL,
1357dec0b466SHong Zhang                                        /*150*/ NULL,
1358eede4a3fSMark Adams                                        NULL,
1359dec0b466SHong Zhang                                        NULL};
13608965ea79SLois Curfman McInnes 
136166976f2fSJacob Faibussowitsch static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1362d71ae5a4SJacob Faibussowitsch {
1363637a0070SStefano Zampini   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
136447d993e7Ssuyashtn   MatType       mtype = MATSEQDENSE;
1365a23d5eceSKris Buschelman 
1366a23d5eceSKris Buschelman   PetscFunctionBegin;
136728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
13689566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
13699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
1370637a0070SStefano Zampini   if (!a->A) {
13719566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
13729566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1373637a0070SStefano Zampini   }
137447d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA)
137547d993e7Ssuyashtn   PetscBool iscuda;
13769566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
137747d993e7Ssuyashtn   if (iscuda) mtype = MATSEQDENSECUDA;
137847d993e7Ssuyashtn #endif
137947d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
138047d993e7Ssuyashtn   PetscBool iship;
138147d993e7Ssuyashtn   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
138247d993e7Ssuyashtn   if (iship) mtype = MATSEQDENSEHIP;
138347d993e7Ssuyashtn #endif
138447d993e7Ssuyashtn   PetscCall(MatSetType(a->A, mtype));
13859566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
138647d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
13873faff063SStefano Zampini   mat->offloadmask = a->A->offloadmask;
13883faff063SStefano Zampini #endif
1389637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
13906f08ad05SPierre Jolivet   mat->assembled    = PETSC_TRUE;
13913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1392a23d5eceSKris Buschelman }
1393a23d5eceSKris Buschelman 
1394d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1395d71ae5a4SJacob Faibussowitsch {
13964e29119aSPierre Jolivet   Mat B, C;
13974e29119aSPierre Jolivet 
13984e29119aSPierre Jolivet   PetscFunctionBegin;
13999566063dSJacob Faibussowitsch   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
14009566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
14019566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&C));
14024e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14034e29119aSPierre Jolivet     C = *newmat;
14044e29119aSPierre Jolivet   } else C = NULL;
14059566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14069566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14074e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14089566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14094e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14114e29119aSPierre Jolivet }
14124e29119aSPierre Jolivet 
141366976f2fSJacob Faibussowitsch static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1414d71ae5a4SJacob Faibussowitsch {
14154e29119aSPierre Jolivet   Mat B, C;
14164e29119aSPierre Jolivet 
14174e29119aSPierre Jolivet   PetscFunctionBegin;
14189566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLocalMatrix(A, &C));
14199566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
14204e29119aSPierre Jolivet   if (reuse == MAT_REUSE_MATRIX) {
14214e29119aSPierre Jolivet     C = *newmat;
14224e29119aSPierre Jolivet   } else C = NULL;
14239566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
14249566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
14254e29119aSPierre Jolivet   if (reuse == MAT_INPLACE_MATRIX) {
14269566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &C));
14274e29119aSPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
14283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14294e29119aSPierre Jolivet }
14304e29119aSPierre Jolivet 
143165b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1432d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1433d71ae5a4SJacob Faibussowitsch {
1434f709e8d7SPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
14358ea901baSHong Zhang   Mat           mat_elemental;
143632d7a744SHong Zhang   PetscScalar  *v;
1437f709e8d7SPierre Jolivet   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
14388ea901baSHong Zhang 
14398baccfbdSHong Zhang   PetscFunctionBegin;
1440378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1441378336b6SHong Zhang     mat_elemental = *newmat;
14429566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(*newmat));
1443378336b6SHong Zhang   } else {
14449566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
14459566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
14469566063dSJacob Faibussowitsch     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
14479566063dSJacob Faibussowitsch     PetscCall(MatSetUp(mat_elemental));
14489566063dSJacob Faibussowitsch     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1449378336b6SHong Zhang   }
1450378336b6SHong Zhang 
14519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &rows, N, &cols));
145232d7a744SHong Zhang   for (i = 0; i < N; i++) cols[i] = i;
145332d7a744SHong Zhang   for (i = 0; i < m; i++) rows[i] = rstart + i;
14548ea901baSHong Zhang 
1455637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
14569566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(A, &v));
1457f709e8d7SPierre Jolivet   PetscCall(MatDenseGetLDA(a->A, &lda));
1458f709e8d7SPierre Jolivet   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1459f709e8d7SPierre Jolivet   else {
1460f709e8d7SPierre Jolivet     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1461f709e8d7SPierre Jolivet   }
14629566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
14639566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
14649566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(A, &v));
14659566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, cols));
14668ea901baSHong Zhang 
1467511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
14689566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &mat_elemental));
14698ea901baSHong Zhang   } else {
14708ea901baSHong Zhang     *newmat = mat_elemental;
14718ea901baSHong Zhang   }
14723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14738baccfbdSHong Zhang }
147465b80a83SHong Zhang #endif
14758baccfbdSHong Zhang 
1476d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1477d71ae5a4SJacob Faibussowitsch {
147886aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
147986aefd0dSHong Zhang 
148086aefd0dSHong Zhang   PetscFunctionBegin;
14819566063dSJacob Faibussowitsch   PetscCall(MatDenseGetColumn(mat->A, col, vals));
14823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
148386aefd0dSHong Zhang }
148486aefd0dSHong Zhang 
1485d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1486d71ae5a4SJacob Faibussowitsch {
148786aefd0dSHong Zhang   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
148886aefd0dSHong Zhang 
148986aefd0dSHong Zhang   PetscFunctionBegin;
14909566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreColumn(mat->A, vals));
14913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
149286aefd0dSHong Zhang }
149386aefd0dSHong Zhang 
1494d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1495d71ae5a4SJacob Faibussowitsch {
149694e2cb23SJakub Kruzik   Mat_MPIDense *mat;
149794e2cb23SJakub Kruzik   PetscInt      m, nloc, N;
149894e2cb23SJakub Kruzik 
149994e2cb23SJakub Kruzik   PetscFunctionBegin;
15009566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
15019566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
150294e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
150394e2cb23SJakub Kruzik     PetscInt sum;
150494e2cb23SJakub Kruzik 
150548a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
150694e2cb23SJakub Kruzik     /* Check sum(n) = N */
15071c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
150808401ef6SPierre Jolivet     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
150994e2cb23SJakub Kruzik 
15109566063dSJacob Faibussowitsch     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
15119566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
151294e2cb23SJakub Kruzik   }
151394e2cb23SJakub Kruzik 
151494e2cb23SJakub Kruzik   /* numeric phase */
151594e2cb23SJakub Kruzik   mat = (Mat_MPIDense *)(*outmat)->data;
15169566063dSJacob Faibussowitsch   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
15173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
151894e2cb23SJakub Kruzik }
151994e2cb23SJakub Kruzik 
1520d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1521d71ae5a4SJacob Faibussowitsch {
15226947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15236947451fSStefano Zampini   PetscInt      lda;
15246947451fSStefano Zampini 
15256947451fSStefano Zampini   PetscFunctionBegin;
152628b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
152728b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1528d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15296947451fSStefano Zampini   a->vecinuse = col + 1;
15309566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15319566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
15328e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15336947451fSStefano Zampini   *v = a->cvec;
15343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15356947451fSStefano Zampini }
15366947451fSStefano Zampini 
1537d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1538d71ae5a4SJacob Faibussowitsch {
15396947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15406947451fSStefano Zampini 
15416947451fSStefano Zampini   PetscFunctionBegin;
154228b400f6SJacob Faibussowitsch   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
154328b400f6SJacob Faibussowitsch   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
15446947451fSStefano Zampini   a->vecinuse = 0;
15459566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
15469566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1547742765d3SMatthew Knepley   if (v) *v = NULL;
15483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15496947451fSStefano Zampini }
15506947451fSStefano Zampini 
1551d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1552d71ae5a4SJacob Faibussowitsch {
15536947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15546947451fSStefano Zampini   PetscInt      lda;
15556947451fSStefano Zampini 
15566947451fSStefano Zampini   PetscFunctionBegin;
155728b400f6SJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
155828b400f6SJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1559d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15606947451fSStefano Zampini   a->vecinuse = col + 1;
15619566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15629566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
15638e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15649566063dSJacob Faibussowitsch   PetscCall(VecLockReadPush(a->cvec));
15656947451fSStefano Zampini   *v = a->cvec;
15663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15676947451fSStefano Zampini }
15686947451fSStefano Zampini 
1569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1570d71ae5a4SJacob Faibussowitsch {
15716947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15726947451fSStefano Zampini 
15736947451fSStefano Zampini   PetscFunctionBegin;
15745f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
15755f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
15766947451fSStefano Zampini   a->vecinuse = 0;
15779566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
15789566063dSJacob Faibussowitsch   PetscCall(VecLockReadPop(a->cvec));
15799566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1580742765d3SMatthew Knepley   if (v) *v = NULL;
15813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15826947451fSStefano Zampini }
15836947451fSStefano Zampini 
1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1585d71ae5a4SJacob Faibussowitsch {
15866947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
15876947451fSStefano Zampini   PetscInt      lda;
15886947451fSStefano Zampini 
15896947451fSStefano Zampini   PetscFunctionBegin;
15905f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
15915f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1592d16ceb75SStefano Zampini   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
15936947451fSStefano Zampini   a->vecinuse = col + 1;
15949566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &lda));
15959566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
15968e3a54c0SPierre Jolivet   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
15976947451fSStefano Zampini   *v = a->cvec;
15983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15996947451fSStefano Zampini }
16006947451fSStefano Zampini 
1601d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1602d71ae5a4SJacob Faibussowitsch {
16036947451fSStefano Zampini   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
16046947451fSStefano Zampini 
16056947451fSStefano Zampini   PetscFunctionBegin;
16065f80ce2aSJacob Faibussowitsch   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
16075f80ce2aSJacob Faibussowitsch   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
16086947451fSStefano Zampini   a->vecinuse = 0;
16099566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
16109566063dSJacob Faibussowitsch   PetscCall(VecResetArray(a->cvec));
1611742765d3SMatthew Knepley   if (v) *v = NULL;
16123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16136947451fSStefano Zampini }
16146947451fSStefano Zampini 
161566976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1616d71ae5a4SJacob Faibussowitsch {
16175ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1618616b8fbbSStefano Zampini   Mat_MPIDense *c;
1619616b8fbbSStefano Zampini   MPI_Comm      comm;
1620a2748737SPierre Jolivet   PetscInt      pbegin, pend;
16215ea7661aSPierre Jolivet 
16225ea7661aSPierre Jolivet   PetscFunctionBegin;
16239566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
16245f80ce2aSJacob Faibussowitsch   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
16255f80ce2aSJacob Faibussowitsch   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1626a2748737SPierre Jolivet   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1627a2748737SPierre Jolivet   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
16285ea7661aSPierre Jolivet   if (!a->cmat) {
16299566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &a->cmat));
16309566063dSJacob Faibussowitsch     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1631a2748737SPierre Jolivet     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1632a2748737SPierre Jolivet     else {
1633a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1634a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1635a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1636a2748737SPierre Jolivet     }
16379566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
16389566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1639a2748737SPierre Jolivet   } else {
1640a2748737SPierre Jolivet     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1641a2748737SPierre Jolivet     if (same && a->cmat->rmap->N != A->rmap->N) {
1642a2748737SPierre Jolivet       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1643a2748737SPierre Jolivet       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1644a2748737SPierre Jolivet     }
1645a2748737SPierre Jolivet     if (!same) {
1646a2748737SPierre Jolivet       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1647a2748737SPierre Jolivet       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1648a2748737SPierre Jolivet       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1649a2748737SPierre Jolivet       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1650a2748737SPierre Jolivet       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1651a2748737SPierre Jolivet     }
1652a2748737SPierre Jolivet     if (cend - cbegin != a->cmat->cmap->N) {
16539566063dSJacob Faibussowitsch       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
16549566063dSJacob Faibussowitsch       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
16559566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
16569566063dSJacob Faibussowitsch       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
16575ea7661aSPierre Jolivet     }
1658a2748737SPierre Jolivet   }
1659616b8fbbSStefano Zampini   c = (Mat_MPIDense *)a->cmat->data;
16605f80ce2aSJacob Faibussowitsch   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1661a2748737SPierre Jolivet   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
16623faff063SStefano Zampini 
1663616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1664616b8fbbSStefano Zampini   a->cmat->assembled    = PETSC_TRUE;
16653faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
16663faff063SStefano Zampini   a->cmat->offloadmask = c->A->offloadmask;
16673faff063SStefano Zampini #endif
16685ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
16695ea7661aSPierre Jolivet   *v          = a->cmat;
16703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16715ea7661aSPierre Jolivet }
16725ea7661aSPierre Jolivet 
167366976f2fSJacob Faibussowitsch static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1674d71ae5a4SJacob Faibussowitsch {
16755ea7661aSPierre Jolivet   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1676616b8fbbSStefano Zampini   Mat_MPIDense *c;
16775ea7661aSPierre Jolivet 
16785ea7661aSPierre Jolivet   PetscFunctionBegin;
16795f80ce2aSJacob Faibussowitsch   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
16805f80ce2aSJacob Faibussowitsch   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
16815f80ce2aSJacob Faibussowitsch   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
16825ea7661aSPierre Jolivet   a->matinuse = 0;
1683616b8fbbSStefano Zampini   c           = (Mat_MPIDense *)a->cmat->data;
16849566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1685742765d3SMatthew Knepley   if (v) *v = NULL;
16863faff063SStefano Zampini #if defined(PETSC_HAVE_DEVICE)
16873faff063SStefano Zampini   A->offloadmask = a->A->offloadmask;
16883faff063SStefano Zampini #endif
16893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16905ea7661aSPierre Jolivet }
16915ea7661aSPierre Jolivet 
1692002bc6daSRichard Tran Mills /*MC
1693002bc6daSRichard Tran Mills    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1694002bc6daSRichard Tran Mills 
16952ef1f0ffSBarry Smith    Options Database Key:
169611a5261eSBarry Smith . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1697002bc6daSRichard Tran Mills 
1698002bc6daSRichard Tran Mills   Level: beginner
1699002bc6daSRichard Tran Mills 
17001cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1701002bc6daSRichard Tran Mills M*/
17024742e46bSJacob Faibussowitsch PetscErrorCode MatCreate_MPIDense(Mat mat)
1703d71ae5a4SJacob Faibussowitsch {
1704273d9f13SBarry Smith   Mat_MPIDense *a;
1705273d9f13SBarry Smith 
1706273d9f13SBarry Smith   PetscFunctionBegin;
17074dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&a));
1708b0a32e0cSBarry Smith   mat->data   = (void *)a;
1709aea10558SJacob Faibussowitsch   mat->ops[0] = MatOps_Values;
1710273d9f13SBarry Smith 
1711273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1712273d9f13SBarry Smith 
1713273d9f13SBarry Smith   /* build cache for off array entries formed */
1714273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
17152205254eSKarl Rupp 
17169566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1717273d9f13SBarry Smith 
1718273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1719f4259b30SLisandro Dalcin   a->lvec        = NULL;
1720f4259b30SLisandro Dalcin   a->Mvctx       = NULL;
1721273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1722273d9f13SBarry Smith 
17239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
17249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
17259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
17269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
17279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
17289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
17299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
17309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
17319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
17329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
17339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
17349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
17359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
17369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
17379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
17389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
17399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
17409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
17419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
17429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
17439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
17448baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
17459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
17468baccfbdSHong Zhang #endif
1747d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
17489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1749d24d4204SJose E. Roman #endif
1750637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
17519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1752637a0070SStefano Zampini #endif
17539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
17549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
17559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
17566718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
17579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
17589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
17596718818eSStefano Zampini #endif
176047d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
176147d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
176247d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
176347d993e7Ssuyashtn   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
176447d993e7Ssuyashtn #endif
17659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
17669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1767*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1768*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1769*0be0d8bdSHansol Suh   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
17709566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
17713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1772273d9f13SBarry Smith }
1773273d9f13SBarry Smith 
1774209238afSKris Buschelman /*MC
1775002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1776209238afSKris Buschelman 
177711a5261eSBarry Smith    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
177811a5261eSBarry Smith    and `MATMPIDENSE` otherwise.
1779209238afSKris Buschelman 
17802ef1f0ffSBarry Smith    Options Database Key:
178111a5261eSBarry Smith . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1782209238afSKris Buschelman 
1783209238afSKris Buschelman   Level: beginner
1784209238afSKris Buschelman 
17851cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
17866947451fSStefano Zampini M*/
17876947451fSStefano Zampini 
1788273d9f13SBarry Smith /*@C
1789273d9f13SBarry Smith   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1790273d9f13SBarry Smith 
17918f490ea3SStefano Zampini   Collective
1792273d9f13SBarry Smith 
1793273d9f13SBarry Smith   Input Parameters:
1794fe59aa6dSJacob Faibussowitsch + B    - the matrix
17952ef1f0ffSBarry Smith - data - optional location of matrix data.  Set to `NULL` for PETSc
1796273d9f13SBarry Smith    to control all matrix memory allocation.
1797273d9f13SBarry Smith 
17982ef1f0ffSBarry Smith   Level: intermediate
17992ef1f0ffSBarry Smith 
1800273d9f13SBarry Smith   Notes:
18012ef1f0ffSBarry Smith   The dense format is fully compatible with standard Fortran
1802273d9f13SBarry Smith   storage by columns.
1803273d9f13SBarry Smith 
1804273d9f13SBarry Smith   The data input variable is intended primarily for Fortran programmers
1805273d9f13SBarry Smith   who wish to allocate their own matrix memory space.  Most users should
18062ef1f0ffSBarry Smith   set `data` to `NULL`.
1807273d9f13SBarry Smith 
18081cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1809273d9f13SBarry Smith @*/
1810d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1811d71ae5a4SJacob Faibussowitsch {
1812273d9f13SBarry Smith   PetscFunctionBegin;
1813d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1814cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
18153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1816273d9f13SBarry Smith }
1817273d9f13SBarry Smith 
1818d3042a70SBarry Smith /*@
181911a5261eSBarry Smith   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1820d3042a70SBarry Smith   array provided by the user. This is useful to avoid copying an array
1821d3042a70SBarry Smith   into a matrix
1822d3042a70SBarry Smith 
1823d3042a70SBarry Smith   Not Collective
1824d3042a70SBarry Smith 
1825d3042a70SBarry Smith   Input Parameters:
1826d3042a70SBarry Smith + mat   - the matrix
1827d3042a70SBarry Smith - array - the array in column major order
1828d3042a70SBarry Smith 
18292ef1f0ffSBarry Smith   Level: developer
18302ef1f0ffSBarry Smith 
183111a5261eSBarry Smith   Note:
183211a5261eSBarry Smith   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1833d3042a70SBarry Smith   freed when the matrix is destroyed.
1834d3042a70SBarry Smith 
18351cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
183611a5261eSBarry Smith           `MatDenseReplaceArray()`
1837d3042a70SBarry Smith @*/
1838d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1839d71ae5a4SJacob Faibussowitsch {
1840d3042a70SBarry Smith   PetscFunctionBegin;
1841d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1842cac4c232SBarry Smith   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
18439566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
184447d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1845637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1846637a0070SStefano Zampini #endif
18473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1848d3042a70SBarry Smith }
1849d3042a70SBarry Smith 
1850d3042a70SBarry Smith /*@
185111a5261eSBarry Smith   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1852d3042a70SBarry Smith 
1853d3042a70SBarry Smith   Not Collective
1854d3042a70SBarry Smith 
18552fe279fdSBarry Smith   Input Parameter:
1856d3042a70SBarry Smith . mat - the matrix
1857d3042a70SBarry Smith 
18582ef1f0ffSBarry Smith   Level: developer
18592ef1f0ffSBarry Smith 
186011a5261eSBarry Smith   Note:
186111a5261eSBarry Smith   You can only call this after a call to `MatDensePlaceArray()`
1862d3042a70SBarry Smith 
18631cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1864d3042a70SBarry Smith @*/
1865d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseResetArray(Mat mat)
1866d71ae5a4SJacob Faibussowitsch {
1867d3042a70SBarry Smith   PetscFunctionBegin;
1868d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1869cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
18709566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
18713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1872d3042a70SBarry Smith }
1873d3042a70SBarry Smith 
1874d5ea218eSStefano Zampini /*@
1875d5ea218eSStefano Zampini   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1876d5ea218eSStefano Zampini   array provided by the user. This is useful to avoid copying an array
1877d5ea218eSStefano Zampini   into a matrix
1878d5ea218eSStefano Zampini 
1879d5ea218eSStefano Zampini   Not Collective
1880d5ea218eSStefano Zampini 
1881d5ea218eSStefano Zampini   Input Parameters:
1882d5ea218eSStefano Zampini + mat   - the matrix
1883d5ea218eSStefano Zampini - array - the array in column major order
1884d5ea218eSStefano Zampini 
18852ef1f0ffSBarry Smith   Level: developer
18862ef1f0ffSBarry Smith 
188711a5261eSBarry Smith   Note:
188811a5261eSBarry Smith   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1889d5ea218eSStefano Zampini   freed by the user. It will be freed when the matrix is destroyed.
1890d5ea218eSStefano Zampini 
18911cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1892d5ea218eSStefano Zampini @*/
1893d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1894d71ae5a4SJacob Faibussowitsch {
1895d5ea218eSStefano Zampini   PetscFunctionBegin;
1896d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1897cac4c232SBarry Smith   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
18989566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
189947d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1900d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1901d5ea218eSStefano Zampini #endif
19023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1903d5ea218eSStefano Zampini }
1904d5ea218eSStefano Zampini 
1905637a0070SStefano Zampini /*@C
190611a5261eSBarry Smith   MatCreateDense - Creates a matrix in `MATDENSE` format.
19078965ea79SLois Curfman McInnes 
1908d083f849SBarry Smith   Collective
1909db81eaa0SLois Curfman McInnes 
19108965ea79SLois Curfman McInnes   Input Parameters:
1911db81eaa0SLois Curfman McInnes + comm - MPI communicator
19122ef1f0ffSBarry Smith . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
19132ef1f0ffSBarry Smith . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
19142ef1f0ffSBarry Smith . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
19152ef1f0ffSBarry Smith . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
19162ef1f0ffSBarry Smith - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1917dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
19188965ea79SLois Curfman McInnes 
19198965ea79SLois Curfman McInnes   Output Parameter:
1920477f1c0bSLois Curfman McInnes . A - the matrix
19218965ea79SLois Curfman McInnes 
19222ef1f0ffSBarry Smith   Level: intermediate
19232ef1f0ffSBarry Smith 
1924b259b22eSLois Curfman McInnes   Notes:
19252ef1f0ffSBarry Smith   The dense format is fully compatible with standard Fortran
192639ddd567SLois Curfman McInnes   storage by columns.
192711a5261eSBarry Smith 
192811a5261eSBarry Smith   Although local portions of the matrix are stored in column-major
1929002bc6daSRichard Tran Mills   order, the matrix is partitioned across MPI ranks by row.
19308965ea79SLois Curfman McInnes 
193118f449edSLois Curfman McInnes   The data input variable is intended primarily for Fortran programmers
193218f449edSLois Curfman McInnes   who wish to allocate their own matrix memory space.  Most users should
19332ef1f0ffSBarry Smith   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
193418f449edSLois Curfman McInnes 
19358965ea79SLois Curfman McInnes   The user MUST specify either the local or global matrix dimensions
19368965ea79SLois Curfman McInnes   (possibly both).
19378965ea79SLois Curfman McInnes 
19381cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
19398965ea79SLois Curfman McInnes @*/
1940d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1941d71ae5a4SJacob Faibussowitsch {
19423a40ed3dSBarry Smith   PetscFunctionBegin;
19439566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
19449566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
19456f08ad05SPierre Jolivet   PetscCall(MatSetType(*A, MATDENSE));
19469566063dSJacob Faibussowitsch   PetscCall(MatSeqDenseSetPreallocation(*A, data));
19476f08ad05SPierre Jolivet   PetscCall(MatMPIDenseSetPreallocation(*A, data));
19483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19498965ea79SLois Curfman McInnes }
19508965ea79SLois Curfman McInnes 
1951d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1952d71ae5a4SJacob Faibussowitsch {
19538965ea79SLois Curfman McInnes   Mat           mat;
19543501a2bdSLois Curfman McInnes   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
19558965ea79SLois Curfman McInnes 
19563a40ed3dSBarry Smith   PetscFunctionBegin;
1957f4259b30SLisandro Dalcin   *newmat = NULL;
19589566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
19599566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
19609566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1961834f8fabSBarry Smith   a = (Mat_MPIDense *)mat->data;
19625aa7edbeSHong Zhang 
1963d5f3da31SBarry Smith   mat->factortype   = A->factortype;
1964c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1965273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
19668965ea79SLois Curfman McInnes 
1967e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
19683782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
1969e04c1aa4SHong Zhang 
19709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
19719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
19728965ea79SLois Curfman McInnes 
19739566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
197401b82886SBarry Smith 
19758965ea79SLois Curfman McInnes   *newmat = mat;
19763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19778965ea79SLois Curfman McInnes }
19788965ea79SLois Curfman McInnes 
197966976f2fSJacob Faibussowitsch static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1980d71ae5a4SJacob Faibussowitsch {
198187d5ce66SSatish Balay   PetscBool isbinary;
198287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
198387d5ce66SSatish Balay   PetscBool ishdf5;
198487d5ce66SSatish Balay #endif
1985eb91f321SVaclav Hapla 
1986eb91f321SVaclav Hapla   PetscFunctionBegin;
1987eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1988eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1989eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
19909566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
19919566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
199287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
19939566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
199487d5ce66SSatish Balay #endif
1995eb91f321SVaclav Hapla   if (isbinary) {
19969566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1997eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
199887d5ce66SSatish Balay   } else if (ishdf5) {
19999566063dSJacob Faibussowitsch     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2000eb91f321SVaclav Hapla #endif
200198921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
20023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2003eb91f321SVaclav Hapla }
2004eb91f321SVaclav Hapla 
2005d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2006d71ae5a4SJacob Faibussowitsch {
20076e4ee0c6SHong Zhang   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
20086e4ee0c6SHong Zhang   Mat           a, b;
200990ace30eSBarry Smith 
20106e4ee0c6SHong Zhang   PetscFunctionBegin;
20116e4ee0c6SHong Zhang   a = matA->A;
20126e4ee0c6SHong Zhang   b = matB->A;
20132cf15c64SPierre Jolivet   PetscCall(MatEqual(a, b, flag));
20142cf15c64SPierre Jolivet   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
20153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20166e4ee0c6SHong Zhang }
201790ace30eSBarry Smith 
201866976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2019d71ae5a4SJacob Faibussowitsch {
20206718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2021baa3c1c6SHong Zhang 
2022baa3c1c6SHong Zhang   PetscFunctionBegin;
20239566063dSJacob Faibussowitsch   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
20249566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&atb->atb));
20259566063dSJacob Faibussowitsch   PetscCall(PetscFree(atb));
20263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2027baa3c1c6SHong Zhang }
2028baa3c1c6SHong Zhang 
202966976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2030d71ae5a4SJacob Faibussowitsch {
20316718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2032cc48ffa7SToby Isaac 
2033cc48ffa7SToby Isaac   PetscFunctionBegin;
20349566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
20359566063dSJacob Faibussowitsch   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
20369566063dSJacob Faibussowitsch   PetscCall(PetscFree(abt));
20373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2038cc48ffa7SToby Isaac }
2039cc48ffa7SToby Isaac 
2040d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2041d71ae5a4SJacob Faibussowitsch {
2042baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
20436718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2044cb20be35SHong Zhang   MPI_Comm               comm;
20456718818eSStefano Zampini   PetscMPIInt            size, *recvcounts;
20466718818eSStefano Zampini   PetscScalar           *carray, *sendbuf;
2047637a0070SStefano Zampini   const PetscScalar     *atbarray;
2048a1f9a5e6SStefano Zampini   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2049e68c0b26SHong Zhang   const PetscInt        *ranges;
2050cb20be35SHong Zhang 
2051cb20be35SHong Zhang   PetscFunctionBegin;
20526718818eSStefano Zampini   MatCheckProduct(C, 3);
20535f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
20546718818eSStefano Zampini   atb        = (Mat_TransMatMultDense *)C->product->data;
20556718818eSStefano Zampini   recvcounts = atb->recvcounts;
20566718818eSStefano Zampini   sendbuf    = atb->sendbuf;
20576718818eSStefano Zampini 
20589566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
20599566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2060e68c0b26SHong Zhang 
2061c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
20629566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2063cb20be35SHong Zhang 
20649566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
2065cb20be35SHong Zhang 
2066660d5466SHong Zhang   /* arrange atbarray into sendbuf */
20679566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
20689566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2069637a0070SStefano Zampini   for (proc = 0, k = 0; proc < size; proc++) {
2070baa3c1c6SHong Zhang     for (j = 0; j < cN; j++) {
2071a1f9a5e6SStefano Zampini       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2072cb20be35SHong Zhang     }
2073cb20be35SHong Zhang   }
20749566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2075637a0070SStefano Zampini 
2076c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
20779566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
20789566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
20799566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
208067af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
20819566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
20829566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
20833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2084cb20be35SHong Zhang }
2085cb20be35SHong Zhang 
2086d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2087d71ae5a4SJacob Faibussowitsch {
2088cb20be35SHong Zhang   MPI_Comm               comm;
2089baa3c1c6SHong Zhang   PetscMPIInt            size;
2090660d5466SHong Zhang   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2091baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
209247d993e7Ssuyashtn   PetscBool              cisdense = PETSC_FALSE;
20930acaeb71SStefano Zampini   PetscInt               i;
20940acaeb71SStefano Zampini   const PetscInt        *ranges;
2095cb20be35SHong Zhang 
2096cb20be35SHong Zhang   PetscFunctionBegin;
20978f83a4d9SJacob Faibussowitsch   MatCheckProduct(C, 4);
20985f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
20999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2100cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
210198921bdaSJacob Faibussowitsch     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2102cb20be35SHong Zhang   }
2103cb20be35SHong Zhang 
21044222ddf1SHong Zhang   /* create matrix product C */
21059566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
210647d993e7Ssuyashtn #if defined(PETSC_HAVE_CUDA)
21079566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
210847d993e7Ssuyashtn #endif
210947d993e7Ssuyashtn #if defined(PETSC_HAVE_HIP)
211047d993e7Ssuyashtn   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
211147d993e7Ssuyashtn #endif
211248a46eb9SPierre Jolivet   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
21139566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2114baa3c1c6SHong Zhang 
21154222ddf1SHong Zhang   /* create data structure for reuse C */
21169566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
21179566063dSJacob Faibussowitsch   PetscCall(PetscNew(&atb));
21184222ddf1SHong Zhang   cM = C->rmap->N;
21199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
21209566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(C, &ranges));
21210acaeb71SStefano Zampini   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2122baa3c1c6SHong Zhang 
21236718818eSStefano Zampini   C->product->data    = atb;
21246718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
21253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2126cb20be35SHong Zhang }
2127cb20be35SHong Zhang 
2128d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2129d71ae5a4SJacob Faibussowitsch {
2130cc48ffa7SToby Isaac   MPI_Comm               comm;
2131cc48ffa7SToby Isaac   PetscMPIInt            i, size;
2132cc48ffa7SToby Isaac   PetscInt               maxRows, bufsiz;
2133cc48ffa7SToby Isaac   PetscMPIInt            tag;
21344222ddf1SHong Zhang   PetscInt               alg;
2135cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
21364222ddf1SHong Zhang   Mat_Product           *product = C->product;
21374222ddf1SHong Zhang   PetscBool              flg;
2138cc48ffa7SToby Isaac 
2139cc48ffa7SToby Isaac   PetscFunctionBegin;
21406718818eSStefano Zampini   MatCheckProduct(C, 4);
21415f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
21424222ddf1SHong Zhang   /* check local size of A and B */
21435f80ce2aSJacob Faibussowitsch   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2144cc48ffa7SToby Isaac 
21459566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2146637a0070SStefano Zampini   alg = flg ? 0 : 1;
2147cc48ffa7SToby Isaac 
21484222ddf1SHong Zhang   /* setup matrix product C */
21499566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
21509566063dSJacob Faibussowitsch   PetscCall(MatSetType(C, MATMPIDENSE));
21519566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
21529566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
21534222ddf1SHong Zhang 
21544222ddf1SHong Zhang   /* create data structure for reuse C */
21559566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
21569566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
21579566063dSJacob Faibussowitsch   PetscCall(PetscNew(&abt));
2158cc48ffa7SToby Isaac   abt->tag = tag;
2159faa55883SToby Isaac   abt->alg = alg;
2160faa55883SToby Isaac   switch (alg) {
21614222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2162cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2163cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
2164f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2165faa55883SToby Isaac     break;
21664222ddf1SHong Zhang   default: /* alg: "allgatherv" */
2167f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2168f4f49eeaSPierre Jolivet     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2169faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2170faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2171faa55883SToby Isaac     break;
2172faa55883SToby Isaac   }
2173cc48ffa7SToby Isaac 
21746718818eSStefano Zampini   C->product->data                = abt;
21756718818eSStefano Zampini   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
21764222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
21773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2178cc48ffa7SToby Isaac }
2179cc48ffa7SToby Isaac 
2180d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2181d71ae5a4SJacob Faibussowitsch {
2182cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
21836718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2184cc48ffa7SToby Isaac   MPI_Comm               comm;
2185cc48ffa7SToby Isaac   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2186f4259b30SLisandro Dalcin   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2187cc48ffa7SToby Isaac   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2188cc48ffa7SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2189637a0070SStefano Zampini   const PetscScalar     *av, *bv;
219023fff9afSBarry Smith   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2191cc48ffa7SToby Isaac   MPI_Request            reqs[2];
2192cc48ffa7SToby Isaac   const PetscInt        *ranges;
2193cc48ffa7SToby Isaac 
2194cc48ffa7SToby Isaac   PetscFunctionBegin;
21956718818eSStefano Zampini   MatCheckProduct(C, 3);
21965f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
21976718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
21989566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
21999566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
22009566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
22019566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
22029566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
22039566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
22049566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
22059566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
22069566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &i));
22079566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &blda));
22089566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
22099566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
22109566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRanges(B, &ranges));
2211cc48ffa7SToby Isaac   bn = B->rmap->n;
2212637a0070SStefano Zampini   if (blda == bn) {
2213637a0070SStefano Zampini     sendbuf = (PetscScalar *)bv;
2214cc48ffa7SToby Isaac   } else {
2215cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2216cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2217ad540459SPierre Jolivet       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2218cc48ffa7SToby Isaac     }
2219cc48ffa7SToby Isaac   }
2220cc48ffa7SToby Isaac   if (size > 1) {
2221cc48ffa7SToby Isaac     sendto   = (rank + size - 1) % size;
2222cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2223cc48ffa7SToby Isaac   } else {
2224cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2225cc48ffa7SToby Isaac   }
22269566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
22279566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2228cc48ffa7SToby Isaac   recvisfrom = rank;
2229cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2230cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2231cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2232cc48ffa7SToby Isaac     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2233cc48ffa7SToby Isaac 
2234cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2235cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2236cc48ffa7SToby Isaac       sendsiz = cK * bn;
2237cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2238cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
22399566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
22409566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2241cc48ffa7SToby Isaac     }
2242cc48ffa7SToby Isaac 
2243cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
22449566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2245792fecdfSBarry Smith     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2246cc48ffa7SToby Isaac 
2247cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2248cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
22499566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2250cc48ffa7SToby Isaac     }
2251cc48ffa7SToby Isaac     bn         = nextbn;
2252cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2253cc48ffa7SToby Isaac     sendbuf    = recvbuf;
2254cc48ffa7SToby Isaac   }
22559566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
22569566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
22579566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
225867af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
22599566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
22609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
22613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2262cc48ffa7SToby Isaac }
2263cc48ffa7SToby Isaac 
2264d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2265d71ae5a4SJacob Faibussowitsch {
2266faa55883SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
22676718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2268faa55883SToby Isaac   MPI_Comm               comm;
2269637a0070SStefano Zampini   PetscMPIInt            size;
2270637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2271637a0070SStefano Zampini   const PetscScalar     *av, *bv;
2272637a0070SStefano Zampini   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2273faa55883SToby Isaac   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2274637a0070SStefano Zampini   PetscBLASInt           cm, cn, ck, alda, clda;
2275faa55883SToby Isaac 
2276faa55883SToby Isaac   PetscFunctionBegin;
22776718818eSStefano Zampini   MatCheckProduct(C, 3);
22785f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
22796718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
22809566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
22819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
22829566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(a->A, &av));
22839566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(b->A, &bv));
22849566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
22859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(a->A, &i));
22869566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &alda));
22879566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(b->A, &blda));
22889566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(c->A, &i));
22899566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(i, &clda));
2290faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2291faa55883SToby Isaac   bn      = B->rmap->n;
2292faa55883SToby Isaac   sendbuf = abt->buf[0];
2293faa55883SToby Isaac   recvbuf = abt->buf[1];
2294faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2295ad540459SPierre Jolivet     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2296faa55883SToby Isaac   }
22979566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
22989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
22999566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(cK, &ck));
23009566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
23019566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2302792fecdfSBarry Smith   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
23039566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
23049566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
23059566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
230667af85e8SPierre Jolivet   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
23079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
23089566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
23093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2310faa55883SToby Isaac }
2311faa55883SToby Isaac 
2312d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2313d71ae5a4SJacob Faibussowitsch {
23146718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2315faa55883SToby Isaac 
2316faa55883SToby Isaac   PetscFunctionBegin;
23176718818eSStefano Zampini   MatCheckProduct(C, 3);
23185f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
23196718818eSStefano Zampini   abt = (Mat_MatTransMultDense *)C->product->data;
2320faa55883SToby Isaac   switch (abt->alg) {
2321d71ae5a4SJacob Faibussowitsch   case 1:
2322d71ae5a4SJacob Faibussowitsch     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2323d71ae5a4SJacob Faibussowitsch     break;
2324d71ae5a4SJacob Faibussowitsch   default:
2325d71ae5a4SJacob Faibussowitsch     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2326d71ae5a4SJacob Faibussowitsch     break;
2327faa55883SToby Isaac   }
23283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2329faa55883SToby Isaac }
2330faa55883SToby Isaac 
233166976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2332d71ae5a4SJacob Faibussowitsch {
23336718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2334320f2790SHong Zhang 
2335320f2790SHong Zhang   PetscFunctionBegin;
23369566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ce));
23379566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Ae));
23389566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&ab->Be));
23399566063dSJacob Faibussowitsch   PetscCall(PetscFree(ab));
23403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2341320f2790SHong Zhang }
2342320f2790SHong Zhang 
23435d8c7819SPierre Jolivet static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2344d71ae5a4SJacob Faibussowitsch {
23456718818eSStefano Zampini   Mat_MatMultDense *ab;
23465d8c7819SPierre Jolivet   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2347320f2790SHong Zhang 
2348320f2790SHong Zhang   PetscFunctionBegin;
23496718818eSStefano Zampini   MatCheckProduct(C, 3);
23505f80ce2aSJacob Faibussowitsch   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
23516718818eSStefano Zampini   ab = (Mat_MatMultDense *)C->product->data;
23525d8c7819SPierre Jolivet   if (ab->Ae && ab->Ce) {
23535d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL)
23549566063dSJacob Faibussowitsch     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
23559566063dSJacob Faibussowitsch     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
23569566063dSJacob Faibussowitsch     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
23579566063dSJacob Faibussowitsch     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
23585d8c7819SPierre Jolivet #else
23595d8c7819SPierre Jolivet     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
23605d8c7819SPierre Jolivet #endif
23615d8c7819SPierre Jolivet   } else {
23625d8c7819SPierre Jolivet     const PetscScalar *read;
23635d8c7819SPierre Jolivet     PetscScalar       *write;
23645d8c7819SPierre Jolivet     PetscInt           lda;
23655d8c7819SPierre Jolivet 
23665d8c7819SPierre Jolivet     PetscCall(MatDenseGetLDA(B, &lda));
23675d8c7819SPierre Jolivet     PetscCall(MatDenseGetArrayRead(B, &read));
23685d8c7819SPierre Jolivet     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
23695d8c7819SPierre Jolivet     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
23705d8c7819SPierre Jolivet     for (PetscInt i = 0; i < C->cmap->N; ++i) {
23715d8c7819SPierre Jolivet       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
23725d8c7819SPierre Jolivet       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
23735d8c7819SPierre Jolivet     }
23745d8c7819SPierre Jolivet     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
23755d8c7819SPierre Jolivet     PetscCall(MatDenseRestoreArrayRead(B, &read));
2376f4f49eeaSPierre Jolivet     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
23775d8c7819SPierre Jolivet   }
23783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2379320f2790SHong Zhang }
2380320f2790SHong Zhang 
2381d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2382d71ae5a4SJacob Faibussowitsch {
23835d8c7819SPierre Jolivet   Mat_Product      *product = C->product;
23845d8c7819SPierre Jolivet   PetscInt          alg;
2385320f2790SHong Zhang   Mat_MatMultDense *ab;
23865d8c7819SPierre Jolivet   PetscBool         flg;
2387320f2790SHong Zhang 
2388320f2790SHong Zhang   PetscFunctionBegin;
23896718818eSStefano Zampini   MatCheckProduct(C, 4);
23905f80ce2aSJacob Faibussowitsch   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
23914222ddf1SHong Zhang   /* check local size of A and B */
23925d8c7819SPierre Jolivet   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
23935d8c7819SPierre Jolivet              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2394320f2790SHong Zhang 
23955d8c7819SPierre Jolivet   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
23965d8c7819SPierre Jolivet   alg = flg ? 0 : 1;
23974222ddf1SHong Zhang 
23984222ddf1SHong Zhang   /* setup C */
23995d8c7819SPierre Jolivet   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
24005d8c7819SPierre Jolivet   PetscCall(MatSetType(C, MATMPIDENSE));
24019566063dSJacob Faibussowitsch   PetscCall(MatSetUp(C));
2402320f2790SHong Zhang 
2403320f2790SHong Zhang   /* create data structure for reuse Cdense */
24049566063dSJacob Faibussowitsch   PetscCall(PetscNew(&ab));
24055d8c7819SPierre Jolivet 
24065d8c7819SPierre Jolivet   switch (alg) {
24075d8c7819SPierre Jolivet   case 1: /* alg: "elemental" */
24085d8c7819SPierre Jolivet #if PetscDefined(HAVE_ELEMENTAL)
24095d8c7819SPierre Jolivet     /* create elemental matrices Ae and Be */
24105d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
24115d8c7819SPierre Jolivet     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
24125d8c7819SPierre Jolivet     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
24135d8c7819SPierre Jolivet     PetscCall(MatSetUp(ab->Ae));
24145d8c7819SPierre Jolivet     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
24155d8c7819SPierre Jolivet 
24165d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
24175d8c7819SPierre Jolivet     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
24185d8c7819SPierre Jolivet     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
24195d8c7819SPierre Jolivet     PetscCall(MatSetUp(ab->Be));
24205d8c7819SPierre Jolivet     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
24215d8c7819SPierre Jolivet 
24225d8c7819SPierre Jolivet     /* compute symbolic Ce = Ae*Be */
24235d8c7819SPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
24245d8c7819SPierre Jolivet     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
24255d8c7819SPierre Jolivet #else
24265d8c7819SPierre Jolivet     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
24275d8c7819SPierre Jolivet #endif
24285d8c7819SPierre Jolivet     break;
24295d8c7819SPierre Jolivet   default: /* alg: "petsc" */
24305d8c7819SPierre Jolivet     ab->Ae = NULL;
24315d8c7819SPierre Jolivet     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
24325d8c7819SPierre Jolivet     ab->Ce = NULL;
24335d8c7819SPierre Jolivet     break;
24345d8c7819SPierre Jolivet   }
24356718818eSStefano Zampini 
24366718818eSStefano Zampini   C->product->data       = ab;
24376718818eSStefano Zampini   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
24384222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
24393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2440320f2790SHong Zhang }
24412ef1f0ffSBarry Smith 
2442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2443d71ae5a4SJacob Faibussowitsch {
24445d8c7819SPierre Jolivet   Mat_Product *product     = C->product;
24455d8c7819SPierre Jolivet   const char  *algTypes[2] = {"petsc", "elemental"};
24465d8c7819SPierre Jolivet   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
24475d8c7819SPierre Jolivet   PetscBool    flg = PETSC_FALSE;
24485d8c7819SPierre Jolivet 
24494d86920dSPierre Jolivet   PetscFunctionBegin;
24505d8c7819SPierre Jolivet   /* Set default algorithm */
24515d8c7819SPierre Jolivet   alg = 0; /* default is petsc */
24525d8c7819SPierre Jolivet   PetscCall(PetscStrcmp(product->alg, "default", &flg));
24535d8c7819SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24545d8c7819SPierre Jolivet 
24555d8c7819SPierre Jolivet   /* Get runtime option */
24565d8c7819SPierre Jolivet   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
24575d8c7819SPierre Jolivet   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
24585d8c7819SPierre Jolivet   PetscOptionsEnd();
24595d8c7819SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24605d8c7819SPierre Jolivet 
24614222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
24624222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
24633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2464320f2790SHong Zhang }
246586aefd0dSHong Zhang 
2466d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2467d71ae5a4SJacob Faibussowitsch {
24684222ddf1SHong Zhang   Mat_Product *product = C->product;
24694222ddf1SHong Zhang   Mat          A = product->A, B = product->B;
24704222ddf1SHong Zhang 
24714222ddf1SHong Zhang   PetscFunctionBegin;
247207e90273SPierre Jolivet   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
247307e90273SPierre Jolivet              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
24746718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
24756718818eSStefano Zampini   C->ops->productsymbolic          = MatProductSymbolic_AtB;
24763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24774222ddf1SHong Zhang }
24784222ddf1SHong Zhang 
2479d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2480d71ae5a4SJacob Faibussowitsch {
24814222ddf1SHong Zhang   Mat_Product *product     = C->product;
24824222ddf1SHong Zhang   const char  *algTypes[2] = {"allgatherv", "cyclic"};
24834222ddf1SHong Zhang   PetscInt     alg, nalg = 2;
24844222ddf1SHong Zhang   PetscBool    flg = PETSC_FALSE;
24854222ddf1SHong Zhang 
24864222ddf1SHong Zhang   PetscFunctionBegin;
24874222ddf1SHong Zhang   /* Set default algorithm */
24884222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
24899566063dSJacob Faibussowitsch   PetscCall(PetscStrcmp(product->alg, "default", &flg));
249048a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
24914222ddf1SHong Zhang 
24924222ddf1SHong Zhang   /* Get runtime option */
24934222ddf1SHong Zhang   if (product->api_user) {
2494d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
24959566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2496d0609cedSBarry Smith     PetscOptionsEnd();
24974222ddf1SHong Zhang   } else {
2498d0609cedSBarry Smith     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
24999566063dSJacob Faibussowitsch     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2500d0609cedSBarry Smith     PetscOptionsEnd();
25014222ddf1SHong Zhang   }
250248a46eb9SPierre Jolivet   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
25034222ddf1SHong Zhang 
25044222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
25054222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
25063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25074222ddf1SHong Zhang }
25084222ddf1SHong Zhang 
25095d8c7819SPierre Jolivet static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2510d71ae5a4SJacob Faibussowitsch {
25114222ddf1SHong Zhang   Mat_Product *product = C->product;
25124222ddf1SHong Zhang 
25134222ddf1SHong Zhang   PetscFunctionBegin;
25144222ddf1SHong Zhang   switch (product->type) {
2515d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
2516d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2517d71ae5a4SJacob Faibussowitsch     break;
2518d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
2519d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2520d71ae5a4SJacob Faibussowitsch     break;
2521d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
2522d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2523d71ae5a4SJacob Faibussowitsch     break;
2524d71ae5a4SJacob Faibussowitsch   default:
2525d71ae5a4SJacob Faibussowitsch     break;
25264222ddf1SHong Zhang   }
25273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25284222ddf1SHong Zhang }
2529