xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 616b8fbbf6efde6bd7a91eb2845e10221952da25)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
8baa3c1c6SHong Zhang #include <petscblaslapack.h>
98965ea79SLois Curfman McInnes 
10ab92ecdeSBarry Smith /*@
11ab92ecdeSBarry Smith 
12ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
14ab92ecdeSBarry Smith 
15ab92ecdeSBarry Smith     Input Parameter:
16ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
17ab92ecdeSBarry Smith 
18ab92ecdeSBarry Smith     Output Parameter:
19ab92ecdeSBarry Smith .      B - the inner matrix
20ab92ecdeSBarry Smith 
218e6c10adSSatish Balay     Level: intermediate
228e6c10adSSatish Balay 
23ab92ecdeSBarry Smith @*/
24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25ab92ecdeSBarry Smith {
26ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27ab92ecdeSBarry Smith   PetscErrorCode ierr;
28ace3abfcSBarry Smith   PetscBool      flg;
29ab92ecdeSBarry Smith 
30ab92ecdeSBarry Smith   PetscFunctionBegin;
31d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32d5ea218eSStefano Zampini   PetscValidPointer(B,2);
33251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
342205254eSKarl Rupp   if (flg) *B = mat->A;
352205254eSKarl Rupp   else *B = A;
36ab92ecdeSBarry Smith   PetscFunctionReturn(0);
37ab92ecdeSBarry Smith }
38ab92ecdeSBarry Smith 
39ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40ba8c8a56SBarry Smith {
41ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42ba8c8a56SBarry Smith   PetscErrorCode ierr;
43d0f46423SBarry Smith   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44ba8c8a56SBarry Smith 
45ba8c8a56SBarry Smith   PetscFunctionBegin;
46e7e72b3dSBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47ba8c8a56SBarry Smith   lrow = row - rstart;
48ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49ba8c8a56SBarry Smith   PetscFunctionReturn(0);
50ba8c8a56SBarry Smith }
51ba8c8a56SBarry Smith 
52637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53ba8c8a56SBarry Smith {
54637a0070SStefano Zampini   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55ba8c8a56SBarry Smith   PetscErrorCode ierr;
56637a0070SStefano Zampini   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57ba8c8a56SBarry Smith 
58ba8c8a56SBarry Smith   PetscFunctionBegin;
59637a0070SStefano Zampini   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60637a0070SStefano Zampini   lrow = row - rstart;
61637a0070SStefano Zampini   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62ba8c8a56SBarry Smith   PetscFunctionReturn(0);
63ba8c8a56SBarry Smith }
64ba8c8a56SBarry Smith 
6511bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
660de54da6SSatish Balay {
670de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
686849ba73SBarry Smith   PetscErrorCode ierr;
69d0f46423SBarry Smith   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
7087828ca2SBarry Smith   PetscScalar    *array;
710de54da6SSatish Balay   MPI_Comm       comm;
724b6ae80fSStefano Zampini   PetscBool      flg;
7311bd1e4dSLisandro Dalcin   Mat            B;
740de54da6SSatish Balay 
750de54da6SSatish Balay   PetscFunctionBegin;
764b6ae80fSStefano Zampini   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
774b6ae80fSStefano Zampini   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
7811bd1e4dSLisandro Dalcin   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79*616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
804b6ae80fSStefano Zampini 
814b6ae80fSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
824b6ae80fSStefano Zampini     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
830de54da6SSatish Balay     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
8411bd1e4dSLisandro Dalcin     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
8511bd1e4dSLisandro Dalcin     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
8611bd1e4dSLisandro Dalcin     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
874b6ae80fSStefano Zampini     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
8811bd1e4dSLisandro Dalcin     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
894b6ae80fSStefano Zampini     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
9011bd1e4dSLisandro Dalcin     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9111bd1e4dSLisandro Dalcin     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9211bd1e4dSLisandro Dalcin     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
9311bd1e4dSLisandro Dalcin     *a   = B;
9411bd1e4dSLisandro Dalcin     ierr = MatDestroy(&B);CHKERRQ(ierr);
952205254eSKarl Rupp   } else *a = B;
960de54da6SSatish Balay   PetscFunctionReturn(0);
970de54da6SSatish Balay }
980de54da6SSatish Balay 
9913f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
1008965ea79SLois Curfman McInnes {
10139b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102dfbe8321SBarry Smith   PetscErrorCode ierr;
103d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104ace3abfcSBarry Smith   PetscBool      roworiented = A->roworiented;
1058965ea79SLois Curfman McInnes 
1063a40ed3dSBarry Smith   PetscFunctionBegin;
1078965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1085ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
109e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1108965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1118965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
11239b7565bSBarry Smith       if (roworiented) {
11339b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1143a40ed3dSBarry Smith       } else {
1158965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1165ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
117e32f2f54SBarry Smith           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
11839b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
11939b7565bSBarry Smith         }
1208965ea79SLois Curfman McInnes       }
1212205254eSKarl Rupp     } else if (!A->donotstash) {
1225080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
12339b7565bSBarry Smith       if (roworiented) {
124b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125d36fbae8SSatish Balay       } else {
126b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
12739b7565bSBarry Smith       }
128b49de8d1SLois Curfman McInnes     }
129b49de8d1SLois Curfman McInnes   }
1303a40ed3dSBarry Smith   PetscFunctionReturn(0);
131b49de8d1SLois Curfman McInnes }
132b49de8d1SLois Curfman McInnes 
13313f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134b49de8d1SLois Curfman McInnes {
135b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136dfbe8321SBarry Smith   PetscErrorCode ierr;
137d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138b49de8d1SLois Curfman McInnes 
1393a40ed3dSBarry Smith   PetscFunctionBegin;
140b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
141e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
144b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
145b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
146e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147e7e72b3dSBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149b49de8d1SLois Curfman McInnes       }
150e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
1518965ea79SLois Curfman McInnes   }
1523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1538965ea79SLois Curfman McInnes }
1548965ea79SLois Curfman McInnes 
15549a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
15649a6ff4bSBarry Smith {
15749a6ff4bSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
15849a6ff4bSBarry Smith   PetscErrorCode ierr;
15949a6ff4bSBarry Smith 
16049a6ff4bSBarry Smith   PetscFunctionBegin;
16149a6ff4bSBarry Smith   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
16249a6ff4bSBarry Smith   PetscFunctionReturn(0);
16349a6ff4bSBarry Smith }
16449a6ff4bSBarry Smith 
165ad16ce7aSStefano Zampini static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166ad16ce7aSStefano Zampini {
167ad16ce7aSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168ad16ce7aSStefano Zampini   PetscErrorCode ierr;
169ad16ce7aSStefano Zampini 
170ad16ce7aSStefano Zampini   PetscFunctionBegin;
171ad16ce7aSStefano Zampini   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172ad16ce7aSStefano Zampini   PetscFunctionReturn(0);
173ad16ce7aSStefano Zampini }
174ad16ce7aSStefano Zampini 
175637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176ff14e315SSatish Balay {
177ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178dfbe8321SBarry Smith   PetscErrorCode ierr;
179ff14e315SSatish Balay 
1803a40ed3dSBarry Smith   PetscFunctionBegin;
181*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1828c778c55SBarry Smith   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
1833a40ed3dSBarry Smith   PetscFunctionReturn(0);
184ff14e315SSatish Balay }
185ff14e315SSatish Balay 
186637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
1878572280aSBarry Smith {
1888572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1898572280aSBarry Smith   PetscErrorCode ierr;
1908572280aSBarry Smith 
1918572280aSBarry Smith   PetscFunctionBegin;
192*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1938572280aSBarry Smith   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
1948572280aSBarry Smith   PetscFunctionReturn(0);
1958572280aSBarry Smith }
1968572280aSBarry Smith 
1976947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
1986947451fSStefano Zampini {
1996947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
2006947451fSStefano Zampini   PetscErrorCode ierr;
2016947451fSStefano Zampini 
2026947451fSStefano Zampini   PetscFunctionBegin;
203*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
2046947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
2056947451fSStefano Zampini   PetscFunctionReturn(0);
2066947451fSStefano Zampini }
2076947451fSStefano Zampini 
208637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
209d3042a70SBarry Smith {
210d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211d3042a70SBarry Smith   PetscErrorCode ierr;
212d3042a70SBarry Smith 
213d3042a70SBarry Smith   PetscFunctionBegin;
214*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
215*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216d3042a70SBarry Smith   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
217d3042a70SBarry Smith   PetscFunctionReturn(0);
218d3042a70SBarry Smith }
219d3042a70SBarry Smith 
220d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
221d3042a70SBarry Smith {
222d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223d3042a70SBarry Smith   PetscErrorCode ierr;
224d3042a70SBarry Smith 
225d3042a70SBarry Smith   PetscFunctionBegin;
226*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
227*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
228d3042a70SBarry Smith   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
229d3042a70SBarry Smith   PetscFunctionReturn(0);
230d3042a70SBarry Smith }
231d3042a70SBarry Smith 
232d5ea218eSStefano Zampini static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
233d5ea218eSStefano Zampini {
234d5ea218eSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
235d5ea218eSStefano Zampini   PetscErrorCode ierr;
236d5ea218eSStefano Zampini 
237d5ea218eSStefano Zampini   PetscFunctionBegin;
238*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240d5ea218eSStefano Zampini   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
241d5ea218eSStefano Zampini   PetscFunctionReturn(0);
242d5ea218eSStefano Zampini }
243d5ea218eSStefano Zampini 
2447dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
245ca3fa75bSLois Curfman McInnes {
246ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
2476849ba73SBarry Smith   PetscErrorCode    ierr;
248637a0070SStefano Zampini   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
2495d0c19d7SBarry Smith   const PetscInt    *irow,*icol;
250637a0070SStefano Zampini   const PetscScalar *v;
251637a0070SStefano Zampini   PetscScalar       *bv;
252ca3fa75bSLois Curfman McInnes   Mat               newmat;
2534aa3045dSJed Brown   IS                iscol_local;
25442a884f0SBarry Smith   MPI_Comm          comm_is,comm_mat;
255ca3fa75bSLois Curfman McInnes 
256ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
25742a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
25842a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
25942a884f0SBarry Smith   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
26042a884f0SBarry Smith 
2614aa3045dSJed Brown   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
262ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
2634aa3045dSJed Brown   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
264b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
265b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
2664aa3045dSJed Brown   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
267ca3fa75bSLois Curfman McInnes 
268ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2697eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2707eba5e9cSLois Curfman McInnes      original matrix! */
271ca3fa75bSLois Curfman McInnes 
272ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
2737eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
274ca3fa75bSLois Curfman McInnes 
275ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
276ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
277e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2787eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
279ca3fa75bSLois Curfman McInnes     newmat = *B;
280ca3fa75bSLois Curfman McInnes   } else {
281ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
282ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
2834aa3045dSJed Brown     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
2847adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2850298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
286ca3fa75bSLois Curfman McInnes   }
287ca3fa75bSLois Curfman McInnes 
288ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
289ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
290637a0070SStefano Zampini   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
291637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
292637a0070SStefano Zampini   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
2934aa3045dSJed Brown   for (i=0; i<Ncols; i++) {
294637a0070SStefano Zampini     const PetscScalar *av = v + lda*icol[i];
295ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2967eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
297ca3fa75bSLois Curfman McInnes     }
298ca3fa75bSLois Curfman McInnes   }
299637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
300637a0070SStefano Zampini   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
301ca3fa75bSLois Curfman McInnes 
302ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
303ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
305ca3fa75bSLois Curfman McInnes 
306ca3fa75bSLois Curfman McInnes   /* Free work space */
307ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
3085bdf786aSShri Abhyankar   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
30932bb1f2dSLisandro Dalcin   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
310ca3fa75bSLois Curfman McInnes   *B   = newmat;
311ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
312ca3fa75bSLois Curfman McInnes }
313ca3fa75bSLois Curfman McInnes 
314637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
315ff14e315SSatish Balay {
31673a71a0fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
31773a71a0fSBarry Smith   PetscErrorCode ierr;
31873a71a0fSBarry Smith 
3193a40ed3dSBarry Smith   PetscFunctionBegin;
3208c778c55SBarry Smith   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
3213a40ed3dSBarry Smith   PetscFunctionReturn(0);
322ff14e315SSatish Balay }
323ff14e315SSatish Balay 
324637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
3258572280aSBarry Smith {
3268572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
3278572280aSBarry Smith   PetscErrorCode ierr;
3288572280aSBarry Smith 
3298572280aSBarry Smith   PetscFunctionBegin;
3308572280aSBarry Smith   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
3318572280aSBarry Smith   PetscFunctionReturn(0);
3328572280aSBarry Smith }
3338572280aSBarry Smith 
3346947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
3356947451fSStefano Zampini {
3366947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
3376947451fSStefano Zampini   PetscErrorCode ierr;
3386947451fSStefano Zampini 
3396947451fSStefano Zampini   PetscFunctionBegin;
3406947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
3416947451fSStefano Zampini   PetscFunctionReturn(0);
3426947451fSStefano Zampini }
3436947451fSStefano Zampini 
344dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
3458965ea79SLois Curfman McInnes {
34639ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
347dfbe8321SBarry Smith   PetscErrorCode ierr;
34813f74950SBarry Smith   PetscInt       nstash,reallocs;
3498965ea79SLois Curfman McInnes 
3503a40ed3dSBarry Smith   PetscFunctionBegin;
351910cf402Sprj-   if (mdn->donotstash || mat->nooffprocentries) return(0);
3528965ea79SLois Curfman McInnes 
353d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
3548798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
355ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
3563a40ed3dSBarry Smith   PetscFunctionReturn(0);
3578965ea79SLois Curfman McInnes }
3588965ea79SLois Curfman McInnes 
359dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
3608965ea79SLois Curfman McInnes {
36139ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
3626849ba73SBarry Smith   PetscErrorCode ierr;
36313f74950SBarry Smith   PetscInt       i,*row,*col,flg,j,rstart,ncols;
36413f74950SBarry Smith   PetscMPIInt    n;
36587828ca2SBarry Smith   PetscScalar    *val;
3668965ea79SLois Curfman McInnes 
3673a40ed3dSBarry Smith   PetscFunctionBegin;
368910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
3698965ea79SLois Curfman McInnes     /*  wait on receives */
3707ef1d9bdSSatish Balay     while (1) {
3718798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
3727ef1d9bdSSatish Balay       if (!flg) break;
3738965ea79SLois Curfman McInnes 
3747ef1d9bdSSatish Balay       for (i=0; i<n;) {
3757ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
3762205254eSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
3772205254eSKarl Rupp           if (row[j] != rstart) break;
3782205254eSKarl Rupp         }
3797ef1d9bdSSatish Balay         if (j < n) ncols = j-i;
3807ef1d9bdSSatish Balay         else       ncols = n-i;
3817ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
3824b4eb8d3SJed Brown         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
3837ef1d9bdSSatish Balay         i    = j;
3848965ea79SLois Curfman McInnes       }
3857ef1d9bdSSatish Balay     }
3868798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
387910cf402Sprj-   }
3888965ea79SLois Curfman McInnes 
38939ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
39039ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
3918965ea79SLois Curfman McInnes 
3926d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
39339ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
3948965ea79SLois Curfman McInnes   }
3953a40ed3dSBarry Smith   PetscFunctionReturn(0);
3968965ea79SLois Curfman McInnes }
3978965ea79SLois Curfman McInnes 
398dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
3998965ea79SLois Curfman McInnes {
400dfbe8321SBarry Smith   PetscErrorCode ierr;
40139ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
4023a40ed3dSBarry Smith 
4033a40ed3dSBarry Smith   PetscFunctionBegin;
4043a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
4053a40ed3dSBarry Smith   PetscFunctionReturn(0);
4068965ea79SLois Curfman McInnes }
4078965ea79SLois Curfman McInnes 
408637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
4098965ea79SLois Curfman McInnes {
41039ddd567SLois Curfman McInnes   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
4116849ba73SBarry Smith   PetscErrorCode    ierr;
412637a0070SStefano Zampini   PetscInt          i,len,*lrows;
413637a0070SStefano Zampini 
414637a0070SStefano Zampini   PetscFunctionBegin;
415637a0070SStefano Zampini   /* get locally owned rows */
416637a0070SStefano Zampini   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
417637a0070SStefano Zampini   /* fix right hand side if needed */
418637a0070SStefano Zampini   if (x && b) {
41997b48c8fSBarry Smith     const PetscScalar *xx;
42097b48c8fSBarry Smith     PetscScalar       *bb;
4218965ea79SLois Curfman McInnes 
42297b48c8fSBarry Smith     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
423637a0070SStefano Zampini     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
424637a0070SStefano Zampini     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
42597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
426637a0070SStefano Zampini     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
42797b48c8fSBarry Smith   }
428637a0070SStefano Zampini   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
429e2eb51b1SBarry Smith   if (diag != 0.0) {
430637a0070SStefano Zampini     Vec d;
431b9679d65SBarry Smith 
432637a0070SStefano Zampini     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
433637a0070SStefano Zampini     ierr = VecSet(d,diag);CHKERRQ(ierr);
434637a0070SStefano Zampini     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
435637a0070SStefano Zampini     ierr = VecDestroy(&d);CHKERRQ(ierr);
436b9679d65SBarry Smith   }
437606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4383a40ed3dSBarry Smith   PetscFunctionReturn(0);
4398965ea79SLois Curfman McInnes }
4408965ea79SLois Curfman McInnes 
441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
442cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
443cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
444cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
445cc2e6a90SBarry Smith 
446dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4478965ea79SLois Curfman McInnes {
44839ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
449dfbe8321SBarry Smith   PetscErrorCode    ierr;
450637a0070SStefano Zampini   const PetscScalar *ax;
451637a0070SStefano Zampini   PetscScalar       *ay;
452c456f294SBarry Smith 
4533a40ed3dSBarry Smith   PetscFunctionBegin;
454637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
455637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
456637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
457637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
458637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
459637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
460637a0070SStefano Zampini   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4613a40ed3dSBarry Smith   PetscFunctionReturn(0);
4628965ea79SLois Curfman McInnes }
4638965ea79SLois Curfman McInnes 
464dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4658965ea79SLois Curfman McInnes {
46639ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
467dfbe8321SBarry Smith   PetscErrorCode    ierr;
468637a0070SStefano Zampini   const PetscScalar *ax;
469637a0070SStefano Zampini   PetscScalar       *ay;
470c456f294SBarry Smith 
4713a40ed3dSBarry Smith   PetscFunctionBegin;
472637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
473637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
475637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
476637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
477637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
478637a0070SStefano Zampini   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
4793a40ed3dSBarry Smith   PetscFunctionReturn(0);
4808965ea79SLois Curfman McInnes }
4818965ea79SLois Curfman McInnes 
482dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
483096963f5SLois Curfman McInnes {
484096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
485dfbe8321SBarry Smith   PetscErrorCode    ierr;
486637a0070SStefano Zampini   const PetscScalar *ax;
487637a0070SStefano Zampini   PetscScalar       *ay;
488096963f5SLois Curfman McInnes 
4893a40ed3dSBarry Smith   PetscFunctionBegin;
490637a0070SStefano Zampini   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
491637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
492637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
493637a0070SStefano Zampini   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
494637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
495637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
496637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
497637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
4983a40ed3dSBarry Smith   PetscFunctionReturn(0);
499096963f5SLois Curfman McInnes }
500096963f5SLois Curfman McInnes 
501dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502096963f5SLois Curfman McInnes {
503096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504dfbe8321SBarry Smith   PetscErrorCode    ierr;
505637a0070SStefano Zampini   const PetscScalar *ax;
506637a0070SStefano Zampini   PetscScalar       *ay;
507096963f5SLois Curfman McInnes 
5083a40ed3dSBarry Smith   PetscFunctionBegin;
5093501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
510637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
511637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512637a0070SStefano Zampini   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
513637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
515637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
516637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
5173a40ed3dSBarry Smith   PetscFunctionReturn(0);
518096963f5SLois Curfman McInnes }
519096963f5SLois Curfman McInnes 
520dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
5218965ea79SLois Curfman McInnes {
52239ddd567SLois Curfman McInnes   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
523dfbe8321SBarry Smith   PetscErrorCode    ierr;
524637a0070SStefano Zampini   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
52587828ca2SBarry Smith   PetscScalar       *x,zero = 0.0;
526637a0070SStefano Zampini   const PetscScalar *av;
527ed3cc1f0SBarry Smith 
5283a40ed3dSBarry Smith   PetscFunctionBegin;
5292dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
5301ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
531096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
532e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534d0f46423SBarry Smith   radd = A->rmap->rstart*m;
535637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
536637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
53744cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
538637a0070SStefano Zampini     x[i] = av[radd + i*lda + i];
539096963f5SLois Curfman McInnes   }
540637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
5411ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5423a40ed3dSBarry Smith   PetscFunctionReturn(0);
5438965ea79SLois Curfman McInnes }
5448965ea79SLois Curfman McInnes 
545dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5468965ea79SLois Curfman McInnes {
5473501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548dfbe8321SBarry Smith   PetscErrorCode ierr;
549ed3cc1f0SBarry Smith 
5503a40ed3dSBarry Smith   PetscFunctionBegin;
551aa482453SBarry Smith #if defined(PETSC_USE_LOG)
552d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
5538965ea79SLois Curfman McInnes #endif
5548798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555*616b8fbbSStefano Zampini   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
556*616b8fbbSStefano Zampini   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
5576bf464f9SBarry Smith   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
5586bf464f9SBarry Smith   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
559637a0070SStefano Zampini   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
5606947451fSStefano Zampini   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
5615ea7661aSPierre Jolivet   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
56201b82886SBarry Smith 
563bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
5658baccfbdSHong Zhang 
56649a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
567ad16ce7aSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
5688baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
5698572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
5708572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
5718572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
5726947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
5736947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
574d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
575d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
576d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
5778baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5788baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
5798baccfbdSHong Zhang #endif
580bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
5814222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5824222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
5836718818eSStefano Zampini #if defined (PETSC_HAVE_CUDA)
5846718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
5856718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
5866718818eSStefano Zampini #endif
58786aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
58886aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
5896947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
5906947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
5916947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
5926947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
5936947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
5946947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
5955ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
5965ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
5973a40ed3dSBarry Smith   PetscFunctionReturn(0);
5988965ea79SLois Curfman McInnes }
59939ddd567SLois Curfman McInnes 
60052c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
60152c5f739Sprj- 
6029804daf3SBarry Smith #include <petscdraw.h>
6036849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
6048965ea79SLois Curfman McInnes {
60539ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
606dfbe8321SBarry Smith   PetscErrorCode    ierr;
607*616b8fbbSStefano Zampini   PetscMPIInt       rank;
60819fd82e9SBarry Smith   PetscViewerType   vtype;
609ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
610b0a32e0cSBarry Smith   PetscViewer       sviewer;
611f3ef73ceSBarry Smith   PetscViewerFormat format;
6128965ea79SLois Curfman McInnes 
6133a40ed3dSBarry Smith   PetscFunctionBegin;
614*616b8fbbSStefano Zampini   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
615251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
61732077d6dSBarry Smith   if (iascii) {
618b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
619b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6214e220ebcSLois Curfman McInnes       MatInfo info;
622888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
6231575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
6247b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
625b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6261575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
627637a0070SStefano Zampini       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
6283a40ed3dSBarry Smith       PetscFunctionReturn(0);
629fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6303a40ed3dSBarry Smith       PetscFunctionReturn(0);
6318965ea79SLois Curfman McInnes     }
632f1af5d2fSBarry Smith   } else if (isdraw) {
633b0a32e0cSBarry Smith     PetscDraw draw;
634ace3abfcSBarry Smith     PetscBool isnull;
635f1af5d2fSBarry Smith 
636b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
637b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
638f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
639f1af5d2fSBarry Smith   }
64077ed5343SBarry Smith 
6417da1fb6eSBarry Smith   {
6428965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6438965ea79SLois Curfman McInnes     Mat         A;
644d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
645ba8c8a56SBarry Smith     PetscInt    *cols;
646ba8c8a56SBarry Smith     PetscScalar *vals;
6478965ea79SLois Curfman McInnes 
648ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
6498965ea79SLois Curfman McInnes     if (!rank) {
650f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
6513a40ed3dSBarry Smith     } else {
652f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
6538965ea79SLois Curfman McInnes     }
6547adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
655878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
6560298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
6573bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
6588965ea79SLois Curfman McInnes 
65939ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
66039ddd567SLois Curfman McInnes        but it's quick for now */
66151022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6622205254eSKarl Rupp 
6632205254eSKarl Rupp     row = mat->rmap->rstart;
6642205254eSKarl Rupp     m   = mdn->A->rmap->n;
6658965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
666ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
667ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
668ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
66939ddd567SLois Curfman McInnes       row++;
6708965ea79SLois Curfman McInnes     }
6718965ea79SLois Curfman McInnes 
6726d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6736d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6743f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
675b9b97703SBarry Smith     if (!rank) {
6761a9d3c3cSBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
6777da1fb6eSBarry Smith       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
6788965ea79SLois Curfman McInnes     }
6793f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
680b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6816bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
6828965ea79SLois Curfman McInnes   }
6833a40ed3dSBarry Smith   PetscFunctionReturn(0);
6848965ea79SLois Curfman McInnes }
6858965ea79SLois Curfman McInnes 
686dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
6878965ea79SLois Curfman McInnes {
688dfbe8321SBarry Smith   PetscErrorCode ierr;
689ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw,issocket;
6908965ea79SLois Curfman McInnes 
691433994e6SBarry Smith   PetscFunctionBegin;
692251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
693251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
694251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
695251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
6960f5bd95cSBarry Smith 
69732077d6dSBarry Smith   if (iascii || issocket || isdraw) {
698f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
6990f5bd95cSBarry Smith   } else if (isbinary) {
7008491ab44SLisandro Dalcin     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
70111aeaf0aSBarry Smith   }
7023a40ed3dSBarry Smith   PetscFunctionReturn(0);
7038965ea79SLois Curfman McInnes }
7048965ea79SLois Curfman McInnes 
705dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
7068965ea79SLois Curfman McInnes {
7073501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
7083501a2bdSLois Curfman McInnes   Mat            mdn  = mat->A;
709dfbe8321SBarry Smith   PetscErrorCode ierr;
7103966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
7118965ea79SLois Curfman McInnes 
7123a40ed3dSBarry Smith   PetscFunctionBegin;
7134e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7142205254eSKarl Rupp 
7154e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
7162205254eSKarl Rupp 
7174e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
7184e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
7198965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7204e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7214e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7224e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7234e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7244e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7258965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
7263966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7272205254eSKarl Rupp 
7284e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7294e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7304e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7314e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7324e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7338965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
7343966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7352205254eSKarl Rupp 
7364e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7374e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7384e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7394e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7404e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7418965ea79SLois Curfman McInnes   }
7424e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7434e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7444e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7453a40ed3dSBarry Smith   PetscFunctionReturn(0);
7468965ea79SLois Curfman McInnes }
7478965ea79SLois Curfman McInnes 
748ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
7498965ea79SLois Curfman McInnes {
75039ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
751dfbe8321SBarry Smith   PetscErrorCode ierr;
7528965ea79SLois Curfman McInnes 
7533a40ed3dSBarry Smith   PetscFunctionBegin;
75412c028f9SKris Buschelman   switch (op) {
755512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
75612c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
75712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
75843674050SBarry Smith     MatCheckPreallocated(A,1);
7594e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
76012c028f9SKris Buschelman     break;
76112c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
76243674050SBarry Smith     MatCheckPreallocated(A,1);
7634e0d8c25SBarry Smith     a->roworiented = flg;
7644e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
76512c028f9SKris Buschelman     break;
7664e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
76713fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
76812c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
769071fcb05SBarry Smith   case MAT_SORTED_FULL:
770290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
77112c028f9SKris Buschelman     break;
77212c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
7734e0d8c25SBarry Smith     a->donotstash = flg;
77412c028f9SKris Buschelman     break;
77577e54ba9SKris Buschelman   case MAT_SYMMETRIC:
77677e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7779a4540c5SBarry Smith   case MAT_HERMITIAN:
7789a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
779600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
7805d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
781290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
78277e54ba9SKris Buschelman     break;
78312c028f9SKris Buschelman   default:
784e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
7853a40ed3dSBarry Smith   }
7863a40ed3dSBarry Smith   PetscFunctionReturn(0);
7878965ea79SLois Curfman McInnes }
7888965ea79SLois Curfman McInnes 
789dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
7905b2fa520SLois Curfman McInnes {
7915b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
792637a0070SStefano Zampini   const PetscScalar *l;
793637a0070SStefano Zampini   PetscScalar       x,*v,*vv,*r;
794dfbe8321SBarry Smith   PetscErrorCode    ierr;
795637a0070SStefano Zampini   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
7965b2fa520SLois Curfman McInnes 
7975b2fa520SLois Curfman McInnes   PetscFunctionBegin;
798637a0070SStefano Zampini   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
799637a0070SStefano Zampini   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
80072d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
8015b2fa520SLois Curfman McInnes   if (ll) {
80272d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
803637a0070SStefano Zampini     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
804bca11509SBarry Smith     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
8055b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
8065b2fa520SLois Curfman McInnes       x = l[i];
807637a0070SStefano Zampini       v = vv + i;
808637a0070SStefano Zampini       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
8095b2fa520SLois Curfman McInnes     }
810bca11509SBarry Smith     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
811637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
8125b2fa520SLois Curfman McInnes   }
8135b2fa520SLois Curfman McInnes   if (rr) {
814637a0070SStefano Zampini     const PetscScalar *ar;
815637a0070SStefano Zampini 
816175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
817e32f2f54SBarry Smith     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
818637a0070SStefano Zampini     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
819637a0070SStefano Zampini     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
820637a0070SStefano Zampini     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
821637a0070SStefano Zampini     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
822637a0070SStefano Zampini     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
8235b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
8245b2fa520SLois Curfman McInnes       x = r[i];
825637a0070SStefano Zampini       v = vv + i*lda;
8262205254eSKarl Rupp       for (j=0; j<m; j++) (*v++) *= x;
8275b2fa520SLois Curfman McInnes     }
828637a0070SStefano Zampini     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
829637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
8305b2fa520SLois Curfman McInnes   }
831637a0070SStefano Zampini   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
8325b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8335b2fa520SLois Curfman McInnes }
8345b2fa520SLois Curfman McInnes 
835dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
836096963f5SLois Curfman McInnes {
8373501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
838dfbe8321SBarry Smith   PetscErrorCode    ierr;
83913f74950SBarry Smith   PetscInt          i,j;
840*616b8fbbSStefano Zampini   PetscMPIInt       size;
841329f5518SBarry Smith   PetscReal         sum = 0.0;
842637a0070SStefano Zampini   const PetscScalar *av,*v;
8433501a2bdSLois Curfman McInnes 
8443a40ed3dSBarry Smith   PetscFunctionBegin;
845637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
846637a0070SStefano Zampini   v    = av;
847*616b8fbbSStefano Zampini   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
848*616b8fbbSStefano Zampini   if (size == 1) {
849064f8208SBarry Smith     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
8503501a2bdSLois Curfman McInnes   } else {
8513501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
852d0f46423SBarry Smith       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
853329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
8543501a2bdSLois Curfman McInnes       }
855b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
8568f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
857dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
8583a40ed3dSBarry Smith     } else if (type == NORM_1) {
859329f5518SBarry Smith       PetscReal *tmp,*tmp2;
860580bdb30SBarry Smith       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
861064f8208SBarry Smith       *nrm = 0.0;
862637a0070SStefano Zampini       v    = av;
863d0f46423SBarry Smith       for (j=0; j<mdn->A->cmap->n; j++) {
864d0f46423SBarry Smith         for (i=0; i<mdn->A->rmap->n; i++) {
86567e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
8663501a2bdSLois Curfman McInnes         }
8673501a2bdSLois Curfman McInnes       }
868b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
869d0f46423SBarry Smith       for (j=0; j<A->cmap->N; j++) {
870064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8713501a2bdSLois Curfman McInnes       }
8728627564fSBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
873d0f46423SBarry Smith       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
8743a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
875329f5518SBarry Smith       PetscReal ntemp;
8763501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
877b2566f29SBarry Smith       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
878ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
8793501a2bdSLois Curfman McInnes   }
880637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
8813a40ed3dSBarry Smith   PetscFunctionReturn(0);
8823501a2bdSLois Curfman McInnes }
8833501a2bdSLois Curfman McInnes 
884fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
8853501a2bdSLois Curfman McInnes {
8863501a2bdSLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
8873501a2bdSLois Curfman McInnes   Mat            B;
888d0f46423SBarry Smith   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
8896849ba73SBarry Smith   PetscErrorCode ierr;
890637a0070SStefano Zampini   PetscInt       j,i,lda;
89187828ca2SBarry Smith   PetscScalar    *v;
8923501a2bdSLois Curfman McInnes 
8933a40ed3dSBarry Smith   PetscFunctionBegin;
894cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
895ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
896d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
8977adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
8980298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
899637a0070SStefano Zampini   } else B = *matout;
9003501a2bdSLois Curfman McInnes 
901637a0070SStefano Zampini   m    = a->A->rmap->n; n = a->A->cmap->n;
902637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
903637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
904785e854fSJed Brown   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
9053501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
9061acff37aSSatish Balay   for (j=0; j<n; j++) {
9073501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
908637a0070SStefano Zampini     v   += lda;
9093501a2bdSLois Curfman McInnes   }
910637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
911606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
9126d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9136d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
914cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9153501a2bdSLois Curfman McInnes     *matout = B;
9163501a2bdSLois Curfman McInnes   } else {
91728be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
9183501a2bdSLois Curfman McInnes   }
9193a40ed3dSBarry Smith   PetscFunctionReturn(0);
920096963f5SLois Curfman McInnes }
921096963f5SLois Curfman McInnes 
9226849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
92352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
9248965ea79SLois Curfman McInnes 
9254994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A)
926273d9f13SBarry Smith {
927dfbe8321SBarry Smith   PetscErrorCode ierr;
928273d9f13SBarry Smith 
929273d9f13SBarry Smith   PetscFunctionBegin;
93018992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
93118992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
93218992e5dSStefano Zampini   if (!A->preallocated) {
933273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
93418992e5dSStefano Zampini   }
935273d9f13SBarry Smith   PetscFunctionReturn(0);
936273d9f13SBarry Smith }
937273d9f13SBarry Smith 
938488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
939488007eeSBarry Smith {
940488007eeSBarry Smith   PetscErrorCode ierr;
941488007eeSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
942488007eeSBarry Smith 
943488007eeSBarry Smith   PetscFunctionBegin;
944488007eeSBarry Smith   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
945488007eeSBarry Smith   PetscFunctionReturn(0);
946488007eeSBarry Smith }
947488007eeSBarry Smith 
9487087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat)
949ba337c44SJed Brown {
950ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
951ba337c44SJed Brown   PetscErrorCode ierr;
952ba337c44SJed Brown 
953ba337c44SJed Brown   PetscFunctionBegin;
954ba337c44SJed Brown   ierr = MatConjugate(a->A);CHKERRQ(ierr);
955ba337c44SJed Brown   PetscFunctionReturn(0);
956ba337c44SJed Brown }
957ba337c44SJed Brown 
958ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A)
959ba337c44SJed Brown {
960ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
961ba337c44SJed Brown   PetscErrorCode ierr;
962ba337c44SJed Brown 
963ba337c44SJed Brown   PetscFunctionBegin;
964ba337c44SJed Brown   ierr = MatRealPart(a->A);CHKERRQ(ierr);
965ba337c44SJed Brown   PetscFunctionReturn(0);
966ba337c44SJed Brown }
967ba337c44SJed Brown 
968ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
969ba337c44SJed Brown {
970ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
971ba337c44SJed Brown   PetscErrorCode ierr;
972ba337c44SJed Brown 
973ba337c44SJed Brown   PetscFunctionBegin;
974ba337c44SJed Brown   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
975ba337c44SJed Brown   PetscFunctionReturn(0);
976ba337c44SJed Brown }
977ba337c44SJed Brown 
97849a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
97949a6ff4bSBarry Smith {
98049a6ff4bSBarry Smith   PetscErrorCode ierr;
981637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
98249a6ff4bSBarry Smith 
98349a6ff4bSBarry Smith   PetscFunctionBegin;
984637a0070SStefano Zampini   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
985637a0070SStefano Zampini   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
986637a0070SStefano Zampini   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
98749a6ff4bSBarry Smith   PetscFunctionReturn(0);
98849a6ff4bSBarry Smith }
98949a6ff4bSBarry Smith 
99052c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
99152c5f739Sprj- 
9920716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
9930716a85fSBarry Smith {
9940716a85fSBarry Smith   PetscErrorCode ierr;
9950716a85fSBarry Smith   PetscInt       i,n;
9960716a85fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
9970716a85fSBarry Smith   PetscReal      *work;
9980716a85fSBarry Smith 
9990716a85fSBarry Smith   PetscFunctionBegin;
10000298fd71SBarry Smith   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1001785e854fSJed Brown   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
10020716a85fSBarry Smith   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
10030716a85fSBarry Smith   if (type == NORM_2) {
10040716a85fSBarry Smith     for (i=0; i<n; i++) work[i] *= work[i];
10050716a85fSBarry Smith   }
10060716a85fSBarry Smith   if (type == NORM_INFINITY) {
1007b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
10080716a85fSBarry Smith   } else {
1009b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
10100716a85fSBarry Smith   }
10110716a85fSBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
10120716a85fSBarry Smith   if (type == NORM_2) {
10138f1a2a5eSBarry Smith     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
10140716a85fSBarry Smith   }
10150716a85fSBarry Smith   PetscFunctionReturn(0);
10160716a85fSBarry Smith }
10170716a85fSBarry Smith 
1018637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
10196947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10206947451fSStefano Zampini {
10216947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10226947451fSStefano Zampini   PetscErrorCode ierr;
10236947451fSStefano Zampini   PetscInt       lda;
10246947451fSStefano Zampini 
10256947451fSStefano Zampini   PetscFunctionBegin;
1026*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1027*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
10286947451fSStefano Zampini   if (!a->cvec) {
10296947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1030*616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
10316947451fSStefano Zampini   }
10326947451fSStefano Zampini   a->vecinuse = col + 1;
10336947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
10346947451fSStefano Zampini   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
10356947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
10366947451fSStefano Zampini   *v   = a->cvec;
10376947451fSStefano Zampini   PetscFunctionReturn(0);
10386947451fSStefano Zampini }
10396947451fSStefano Zampini 
10406947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10416947451fSStefano Zampini {
10426947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10436947451fSStefano Zampini   PetscErrorCode ierr;
10446947451fSStefano Zampini 
10456947451fSStefano Zampini   PetscFunctionBegin;
10465ea7661aSPierre Jolivet   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
10476947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
10486947451fSStefano Zampini   a->vecinuse = 0;
10496947451fSStefano Zampini   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
10506947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
10516947451fSStefano Zampini   *v   = NULL;
10526947451fSStefano Zampini   PetscFunctionReturn(0);
10536947451fSStefano Zampini }
10546947451fSStefano Zampini 
10556947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10566947451fSStefano Zampini {
10576947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10586947451fSStefano Zampini   PetscInt       lda;
10596947451fSStefano Zampini   PetscErrorCode ierr;
10606947451fSStefano Zampini 
10616947451fSStefano Zampini   PetscFunctionBegin;
1062*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1063*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
10646947451fSStefano Zampini   if (!a->cvec) {
10656947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1066*616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
10676947451fSStefano Zampini   }
10686947451fSStefano Zampini   a->vecinuse = col + 1;
10696947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
10706947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
10716947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
10726947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
10736947451fSStefano Zampini   *v   = a->cvec;
10746947451fSStefano Zampini   PetscFunctionReturn(0);
10756947451fSStefano Zampini }
10766947451fSStefano Zampini 
10776947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10786947451fSStefano Zampini {
10796947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10806947451fSStefano Zampini   PetscErrorCode ierr;
10816947451fSStefano Zampini 
10826947451fSStefano Zampini   PetscFunctionBegin;
1083*616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1084*616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
10856947451fSStefano Zampini   a->vecinuse = 0;
10866947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
10876947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
10886947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
10896947451fSStefano Zampini   *v   = NULL;
10906947451fSStefano Zampini   PetscFunctionReturn(0);
10916947451fSStefano Zampini }
10926947451fSStefano Zampini 
10936947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10946947451fSStefano Zampini {
10956947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10966947451fSStefano Zampini   PetscErrorCode ierr;
10976947451fSStefano Zampini   PetscInt       lda;
10986947451fSStefano Zampini 
10996947451fSStefano Zampini   PetscFunctionBegin;
1100*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1101*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
11026947451fSStefano Zampini   if (!a->cvec) {
11036947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1104*616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
11056947451fSStefano Zampini   }
11066947451fSStefano Zampini   a->vecinuse = col + 1;
11076947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
11086947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
11096947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
11106947451fSStefano Zampini   *v   = a->cvec;
11116947451fSStefano Zampini   PetscFunctionReturn(0);
11126947451fSStefano Zampini }
11136947451fSStefano Zampini 
11146947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
11156947451fSStefano Zampini {
11166947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
11176947451fSStefano Zampini   PetscErrorCode ierr;
11186947451fSStefano Zampini 
11196947451fSStefano Zampini   PetscFunctionBegin;
1120*616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1121*616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
11226947451fSStefano Zampini   a->vecinuse = 0;
11236947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
11246947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
11256947451fSStefano Zampini   *v   = NULL;
11266947451fSStefano Zampini   PetscFunctionReturn(0);
11276947451fSStefano Zampini }
11286947451fSStefano Zampini 
1129637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1130637a0070SStefano Zampini {
1131637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1132637a0070SStefano Zampini   PetscErrorCode ierr;
1133637a0070SStefano Zampini 
1134637a0070SStefano Zampini   PetscFunctionBegin;
1135*616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1136*616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1137637a0070SStefano Zampini   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1138637a0070SStefano Zampini   PetscFunctionReturn(0);
1139637a0070SStefano Zampini }
1140637a0070SStefano Zampini 
1141637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1142637a0070SStefano Zampini {
1143637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1144637a0070SStefano Zampini   PetscErrorCode ierr;
1145637a0070SStefano Zampini 
1146637a0070SStefano Zampini   PetscFunctionBegin;
1147*616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1148*616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1149637a0070SStefano Zampini   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1150637a0070SStefano Zampini   PetscFunctionReturn(0);
1151637a0070SStefano Zampini }
1152637a0070SStefano Zampini 
1153d5ea218eSStefano Zampini static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1154d5ea218eSStefano Zampini {
1155d5ea218eSStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1156d5ea218eSStefano Zampini   PetscErrorCode ierr;
1157d5ea218eSStefano Zampini 
1158d5ea218eSStefano Zampini   PetscFunctionBegin;
1159*616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1160*616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1161d5ea218eSStefano Zampini   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1162d5ea218eSStefano Zampini   PetscFunctionReturn(0);
1163d5ea218eSStefano Zampini }
1164d5ea218eSStefano Zampini 
1165637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1166637a0070SStefano Zampini {
1167637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1168637a0070SStefano Zampini   PetscErrorCode ierr;
1169637a0070SStefano Zampini 
1170637a0070SStefano Zampini   PetscFunctionBegin;
1171637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1172637a0070SStefano Zampini   PetscFunctionReturn(0);
1173637a0070SStefano Zampini }
1174637a0070SStefano Zampini 
1175637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1176637a0070SStefano Zampini {
1177637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1178637a0070SStefano Zampini   PetscErrorCode ierr;
1179637a0070SStefano Zampini 
1180637a0070SStefano Zampini   PetscFunctionBegin;
1181637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1182637a0070SStefano Zampini   PetscFunctionReturn(0);
1183637a0070SStefano Zampini }
1184637a0070SStefano Zampini 
1185637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1186637a0070SStefano Zampini {
1187637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1188637a0070SStefano Zampini   PetscErrorCode ierr;
1189637a0070SStefano Zampini 
1190637a0070SStefano Zampini   PetscFunctionBegin;
1191637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1192637a0070SStefano Zampini   PetscFunctionReturn(0);
1193637a0070SStefano Zampini }
1194637a0070SStefano Zampini 
1195637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1196637a0070SStefano Zampini {
1197637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1198637a0070SStefano Zampini   PetscErrorCode ierr;
1199637a0070SStefano Zampini 
1200637a0070SStefano Zampini   PetscFunctionBegin;
1201637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1202637a0070SStefano Zampini   PetscFunctionReturn(0);
1203637a0070SStefano Zampini }
1204637a0070SStefano Zampini 
1205637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1206637a0070SStefano Zampini {
1207637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1208637a0070SStefano Zampini   PetscErrorCode ierr;
1209637a0070SStefano Zampini 
1210637a0070SStefano Zampini   PetscFunctionBegin;
1211637a0070SStefano Zampini   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1212637a0070SStefano Zampini   PetscFunctionReturn(0);
1213637a0070SStefano Zampini }
1214637a0070SStefano Zampini 
1215637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1216637a0070SStefano Zampini {
1217637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1218637a0070SStefano Zampini   PetscErrorCode ierr;
1219637a0070SStefano Zampini 
1220637a0070SStefano Zampini   PetscFunctionBegin;
1221637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1222637a0070SStefano Zampini   PetscFunctionReturn(0);
1223637a0070SStefano Zampini }
1224637a0070SStefano Zampini 
12256947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
12266947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
12276947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
12286947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
12296947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
12306947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
12315ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
12326947451fSStefano Zampini 
1233637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1234637a0070SStefano Zampini {
1235637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1236637a0070SStefano Zampini   PetscErrorCode ierr;
1237637a0070SStefano Zampini 
1238637a0070SStefano Zampini   PetscFunctionBegin;
1239*616b8fbbSStefano Zampini   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1240*616b8fbbSStefano Zampini   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1241637a0070SStefano Zampini   if (d->A) {
1242637a0070SStefano Zampini     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1243637a0070SStefano Zampini   }
1244637a0070SStefano Zampini   mat->boundtocpu = bind;
12456947451fSStefano Zampini   if (!bind) {
12466947451fSStefano Zampini     PetscBool iscuda;
12476947451fSStefano Zampini 
12486947451fSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
12496947451fSStefano Zampini     if (!iscuda) {
12506947451fSStefano Zampini       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1251*616b8fbbSStefano Zampini     }
1252*616b8fbbSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1253*616b8fbbSStefano Zampini     if (!iscuda) {
12545ea7661aSPierre Jolivet       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
12556947451fSStefano Zampini     }
12566947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
12576947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
12586947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
12596947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
12606947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
12616947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
12626947451fSStefano Zampini   } else {
12636947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
12646947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
12656947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
12666947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
12676947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
12686947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1269*616b8fbbSStefano Zampini   }
1270*616b8fbbSStefano Zampini   if (d->cmat) {
1271*616b8fbbSStefano Zampini     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
12726947451fSStefano Zampini   }
1273637a0070SStefano Zampini   PetscFunctionReturn(0);
1274637a0070SStefano Zampini }
1275637a0070SStefano Zampini 
1276637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1277637a0070SStefano Zampini {
1278637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1279637a0070SStefano Zampini   PetscErrorCode ierr;
1280637a0070SStefano Zampini   PetscBool      iscuda;
1281637a0070SStefano Zampini 
1282637a0070SStefano Zampini   PetscFunctionBegin;
1283d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1284637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1285637a0070SStefano Zampini   if (!iscuda) PetscFunctionReturn(0);
1286637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1287637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1288637a0070SStefano Zampini   if (!d->A) {
1289637a0070SStefano Zampini     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1290637a0070SStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1291637a0070SStefano Zampini     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1292637a0070SStefano Zampini   }
1293637a0070SStefano Zampini   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1294637a0070SStefano Zampini   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1295637a0070SStefano Zampini   A->preallocated = PETSC_TRUE;
1296637a0070SStefano Zampini   PetscFunctionReturn(0);
1297637a0070SStefano Zampini }
1298637a0070SStefano Zampini #endif
1299637a0070SStefano Zampini 
130073a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
130173a71a0fSBarry Smith {
130273a71a0fSBarry Smith   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
130373a71a0fSBarry Smith   PetscErrorCode ierr;
130473a71a0fSBarry Smith 
130573a71a0fSBarry Smith   PetscFunctionBegin;
1306637a0070SStefano Zampini   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
130773a71a0fSBarry Smith   PetscFunctionReturn(0);
130873a71a0fSBarry Smith }
130973a71a0fSBarry Smith 
13103b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
13113b49f96aSBarry Smith {
13123b49f96aSBarry Smith   PetscFunctionBegin;
13133b49f96aSBarry Smith   *missing = PETSC_FALSE;
13143b49f96aSBarry Smith   PetscFunctionReturn(0);
13153b49f96aSBarry Smith }
13163b49f96aSBarry Smith 
13174222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1318cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
13196718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
13206718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
13216718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
13226718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1323cc48ffa7SToby Isaac 
13248965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
132509dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
132609dc0095SBarry Smith                                         MatGetRow_MPIDense,
132709dc0095SBarry Smith                                         MatRestoreRow_MPIDense,
132809dc0095SBarry Smith                                         MatMult_MPIDense,
132997304618SKris Buschelman                                 /*  4*/ MatMultAdd_MPIDense,
13307c922b88SBarry Smith                                         MatMultTranspose_MPIDense,
13317c922b88SBarry Smith                                         MatMultTransposeAdd_MPIDense,
13328965ea79SLois Curfman McInnes                                         0,
133309dc0095SBarry Smith                                         0,
133409dc0095SBarry Smith                                         0,
133597304618SKris Buschelman                                 /* 10*/ 0,
133609dc0095SBarry Smith                                         0,
133709dc0095SBarry Smith                                         0,
133809dc0095SBarry Smith                                         0,
133909dc0095SBarry Smith                                         MatTranspose_MPIDense,
134097304618SKris Buschelman                                 /* 15*/ MatGetInfo_MPIDense,
13416e4ee0c6SHong Zhang                                         MatEqual_MPIDense,
134209dc0095SBarry Smith                                         MatGetDiagonal_MPIDense,
13435b2fa520SLois Curfman McInnes                                         MatDiagonalScale_MPIDense,
134409dc0095SBarry Smith                                         MatNorm_MPIDense,
134597304618SKris Buschelman                                 /* 20*/ MatAssemblyBegin_MPIDense,
134609dc0095SBarry Smith                                         MatAssemblyEnd_MPIDense,
134709dc0095SBarry Smith                                         MatSetOption_MPIDense,
134809dc0095SBarry Smith                                         MatZeroEntries_MPIDense,
1349d519adbfSMatthew Knepley                                 /* 24*/ MatZeroRows_MPIDense,
1350919b68f7SBarry Smith                                         0,
135101b82886SBarry Smith                                         0,
135201b82886SBarry Smith                                         0,
135301b82886SBarry Smith                                         0,
13544994cf47SJed Brown                                 /* 29*/ MatSetUp_MPIDense,
1355273d9f13SBarry Smith                                         0,
135609dc0095SBarry Smith                                         0,
1357c56a70eeSBarry Smith                                         MatGetDiagonalBlock_MPIDense,
13588c778c55SBarry Smith                                         0,
1359d519adbfSMatthew Knepley                                 /* 34*/ MatDuplicate_MPIDense,
136009dc0095SBarry Smith                                         0,
136109dc0095SBarry Smith                                         0,
136209dc0095SBarry Smith                                         0,
136309dc0095SBarry Smith                                         0,
1364d519adbfSMatthew Knepley                                 /* 39*/ MatAXPY_MPIDense,
13657dae84e0SHong Zhang                                         MatCreateSubMatrices_MPIDense,
136609dc0095SBarry Smith                                         0,
136709dc0095SBarry Smith                                         MatGetValues_MPIDense,
136809dc0095SBarry Smith                                         0,
1369d519adbfSMatthew Knepley                                 /* 44*/ 0,
137009dc0095SBarry Smith                                         MatScale_MPIDense,
13717d68702bSBarry Smith                                         MatShift_Basic,
137209dc0095SBarry Smith                                         0,
137309dc0095SBarry Smith                                         0,
137473a71a0fSBarry Smith                                 /* 49*/ MatSetRandom_MPIDense,
137509dc0095SBarry Smith                                         0,
137609dc0095SBarry Smith                                         0,
137709dc0095SBarry Smith                                         0,
137809dc0095SBarry Smith                                         0,
1379d519adbfSMatthew Knepley                                 /* 54*/ 0,
138009dc0095SBarry Smith                                         0,
138109dc0095SBarry Smith                                         0,
138209dc0095SBarry Smith                                         0,
138309dc0095SBarry Smith                                         0,
13847dae84e0SHong Zhang                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1385b9b97703SBarry Smith                                         MatDestroy_MPIDense,
1386b9b97703SBarry Smith                                         MatView_MPIDense,
1387357abbc8SBarry Smith                                         0,
138897304618SKris Buschelman                                         0,
1389d519adbfSMatthew Knepley                                 /* 64*/ 0,
139097304618SKris Buschelman                                         0,
139197304618SKris Buschelman                                         0,
139297304618SKris Buschelman                                         0,
139397304618SKris Buschelman                                         0,
1394d519adbfSMatthew Knepley                                 /* 69*/ 0,
139597304618SKris Buschelman                                         0,
139697304618SKris Buschelman                                         0,
139797304618SKris Buschelman                                         0,
139897304618SKris Buschelman                                         0,
1399d519adbfSMatthew Knepley                                 /* 74*/ 0,
140097304618SKris Buschelman                                         0,
140197304618SKris Buschelman                                         0,
140297304618SKris Buschelman                                         0,
140397304618SKris Buschelman                                         0,
1404d519adbfSMatthew Knepley                                 /* 79*/ 0,
140597304618SKris Buschelman                                         0,
140697304618SKris Buschelman                                         0,
140797304618SKris Buschelman                                         0,
14085bba2384SShri Abhyankar                                 /* 83*/ MatLoad_MPIDense,
1409865e5f61SKris Buschelman                                         0,
1410865e5f61SKris Buschelman                                         0,
1411865e5f61SKris Buschelman                                         0,
1412865e5f61SKris Buschelman                                         0,
1413865e5f61SKris Buschelman                                         0,
14144222ddf1SHong Zhang                                 /* 89*/ 0,
14154222ddf1SHong Zhang                                         0,
14166718818eSStefano Zampini                                         0,
14172fbe02b9SBarry Smith                                         0,
1418ba337c44SJed Brown                                         0,
1419d519adbfSMatthew Knepley                                 /* 94*/ 0,
14204222ddf1SHong Zhang                                         0,
14216718818eSStefano Zampini                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1422cc48ffa7SToby Isaac                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1423ba337c44SJed Brown                                         0,
14244222ddf1SHong Zhang                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1425ba337c44SJed Brown                                         0,
1426ba337c44SJed Brown                                         0,
1427ba337c44SJed Brown                                         MatConjugate_MPIDense,
1428ba337c44SJed Brown                                         0,
1429ba337c44SJed Brown                                 /*104*/ 0,
1430ba337c44SJed Brown                                         MatRealPart_MPIDense,
1431ba337c44SJed Brown                                         MatImaginaryPart_MPIDense,
143286d161a7SShri Abhyankar                                         0,
143386d161a7SShri Abhyankar                                         0,
143486d161a7SShri Abhyankar                                 /*109*/ 0,
143586d161a7SShri Abhyankar                                         0,
143686d161a7SShri Abhyankar                                         0,
143749a6ff4bSBarry Smith                                         MatGetColumnVector_MPIDense,
14383b49f96aSBarry Smith                                         MatMissingDiagonal_MPIDense,
143986d161a7SShri Abhyankar                                 /*114*/ 0,
144086d161a7SShri Abhyankar                                         0,
144186d161a7SShri Abhyankar                                         0,
144286d161a7SShri Abhyankar                                         0,
144386d161a7SShri Abhyankar                                         0,
144486d161a7SShri Abhyankar                                 /*119*/ 0,
144586d161a7SShri Abhyankar                                         0,
144686d161a7SShri Abhyankar                                         0,
14470716a85fSBarry Smith                                         0,
14480716a85fSBarry Smith                                         0,
14490716a85fSBarry Smith                                 /*124*/ 0,
14503964eb88SJed Brown                                         MatGetColumnNorms_MPIDense,
14513964eb88SJed Brown                                         0,
14523964eb88SJed Brown                                         0,
14533964eb88SJed Brown                                         0,
14543964eb88SJed Brown                                 /*129*/ 0,
14554222ddf1SHong Zhang                                         0,
14566718818eSStefano Zampini                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1457cb20be35SHong Zhang                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
14583964eb88SJed Brown                                         0,
14593964eb88SJed Brown                                 /*134*/ 0,
14603964eb88SJed Brown                                         0,
14613964eb88SJed Brown                                         0,
14623964eb88SJed Brown                                         0,
14633964eb88SJed Brown                                         0,
14643964eb88SJed Brown                                 /*139*/ 0,
1465f9426fe0SMark Adams                                         0,
146694e2cb23SJakub Kruzik                                         0,
146794e2cb23SJakub Kruzik                                         0,
146894e2cb23SJakub Kruzik                                         0,
14694222ddf1SHong Zhang                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
14704222ddf1SHong Zhang                                 /*145*/ 0,
14714222ddf1SHong Zhang                                         0,
14724222ddf1SHong Zhang                                         0
1473ba337c44SJed Brown };
14748965ea79SLois Curfman McInnes 
14757087cfbeSBarry Smith PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1476a23d5eceSKris Buschelman {
1477637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1478637a0070SStefano Zampini   PetscBool      iscuda;
1479dfbe8321SBarry Smith   PetscErrorCode ierr;
1480a23d5eceSKris Buschelman 
1481a23d5eceSKris Buschelman   PetscFunctionBegin;
1482*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
148334ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
148434ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1485637a0070SStefano Zampini   if (!a->A) {
1486f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
14873bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1488637a0070SStefano Zampini     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1489637a0070SStefano Zampini   }
1490637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1491637a0070SStefano Zampini   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1492637a0070SStefano Zampini   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1493637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
1494a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1495a23d5eceSKris Buschelman }
1496a23d5eceSKris Buschelman 
149765b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1498cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
14998baccfbdSHong Zhang {
15008ea901baSHong Zhang   Mat            mat_elemental;
15018ea901baSHong Zhang   PetscErrorCode ierr;
150232d7a744SHong Zhang   PetscScalar    *v;
150332d7a744SHong Zhang   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
15048ea901baSHong Zhang 
15058baccfbdSHong Zhang   PetscFunctionBegin;
1506378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1507378336b6SHong Zhang     mat_elemental = *newmat;
1508378336b6SHong Zhang     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1509378336b6SHong Zhang   } else {
1510378336b6SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1511378336b6SHong Zhang     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1512378336b6SHong Zhang     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1513378336b6SHong Zhang     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
151432d7a744SHong Zhang     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1515378336b6SHong Zhang   }
1516378336b6SHong Zhang 
151732d7a744SHong Zhang   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
151832d7a744SHong Zhang   for (i=0; i<N; i++) cols[i] = i;
151932d7a744SHong Zhang   for (i=0; i<m; i++) rows[i] = rstart + i;
15208ea901baSHong Zhang 
1521637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
152232d7a744SHong Zhang   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
152332d7a744SHong Zhang   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
15248ea901baSHong Zhang   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15258ea901baSHong Zhang   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152632d7a744SHong Zhang   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
152732d7a744SHong Zhang   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
15288ea901baSHong Zhang 
1529511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
153028be2f97SBarry Smith     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
15318ea901baSHong Zhang   } else {
15328ea901baSHong Zhang     *newmat = mat_elemental;
15338ea901baSHong Zhang   }
15348baccfbdSHong Zhang   PetscFunctionReturn(0);
15358baccfbdSHong Zhang }
153665b80a83SHong Zhang #endif
15378baccfbdSHong Zhang 
1538af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
153986aefd0dSHong Zhang {
154086aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
154186aefd0dSHong Zhang   PetscErrorCode ierr;
154286aefd0dSHong Zhang 
154386aefd0dSHong Zhang   PetscFunctionBegin;
154486aefd0dSHong Zhang   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
154586aefd0dSHong Zhang   PetscFunctionReturn(0);
154686aefd0dSHong Zhang }
154786aefd0dSHong Zhang 
1548af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
154986aefd0dSHong Zhang {
155086aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
155186aefd0dSHong Zhang   PetscErrorCode ierr;
155286aefd0dSHong Zhang 
155386aefd0dSHong Zhang   PetscFunctionBegin;
155486aefd0dSHong Zhang   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
155586aefd0dSHong Zhang   PetscFunctionReturn(0);
155686aefd0dSHong Zhang }
155786aefd0dSHong Zhang 
155894e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
155994e2cb23SJakub Kruzik {
156094e2cb23SJakub Kruzik   PetscErrorCode ierr;
156194e2cb23SJakub Kruzik   Mat_MPIDense   *mat;
156294e2cb23SJakub Kruzik   PetscInt       m,nloc,N;
156394e2cb23SJakub Kruzik 
156494e2cb23SJakub Kruzik   PetscFunctionBegin;
156594e2cb23SJakub Kruzik   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
156694e2cb23SJakub Kruzik   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
156794e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
156894e2cb23SJakub Kruzik     PetscInt sum;
156994e2cb23SJakub Kruzik 
157094e2cb23SJakub Kruzik     if (n == PETSC_DECIDE) {
157194e2cb23SJakub Kruzik       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
157294e2cb23SJakub Kruzik     }
157394e2cb23SJakub Kruzik     /* Check sum(n) = N */
157494e2cb23SJakub Kruzik     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
157594e2cb23SJakub Kruzik     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
157694e2cb23SJakub Kruzik 
157794e2cb23SJakub Kruzik     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
157894e2cb23SJakub Kruzik   }
157994e2cb23SJakub Kruzik 
158094e2cb23SJakub Kruzik   /* numeric phase */
158194e2cb23SJakub Kruzik   mat = (Mat_MPIDense*)(*outmat)->data;
158294e2cb23SJakub Kruzik   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
158394e2cb23SJakub Kruzik   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158494e2cb23SJakub Kruzik   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158594e2cb23SJakub Kruzik   PetscFunctionReturn(0);
158694e2cb23SJakub Kruzik }
158794e2cb23SJakub Kruzik 
1588637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1589637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1590637a0070SStefano Zampini {
1591637a0070SStefano Zampini   Mat            B;
1592637a0070SStefano Zampini   Mat_MPIDense   *m;
1593637a0070SStefano Zampini   PetscErrorCode ierr;
1594637a0070SStefano Zampini 
1595637a0070SStefano Zampini   PetscFunctionBegin;
1596637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1597637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1598637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1599637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1600637a0070SStefano Zampini   }
1601637a0070SStefano Zampini 
1602637a0070SStefano Zampini   B    = *newmat;
1603637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1604637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1605637a0070SStefano Zampini   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1606637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1607637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1608637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
16096718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1610637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
16116718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1612637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1613637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1614637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1615637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1616637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1617637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1618637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1619637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1620d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1621637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1622637a0070SStefano Zampini   if (m->A) {
1623637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1624637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1625637a0070SStefano Zampini   }
1626637a0070SStefano Zampini   B->ops->bindtocpu = NULL;
1627637a0070SStefano Zampini   B->offloadmask    = PETSC_OFFLOAD_CPU;
1628637a0070SStefano Zampini   PetscFunctionReturn(0);
1629637a0070SStefano Zampini }
1630637a0070SStefano Zampini 
1631637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1632637a0070SStefano Zampini {
1633637a0070SStefano Zampini   Mat            B;
1634637a0070SStefano Zampini   Mat_MPIDense   *m;
1635637a0070SStefano Zampini   PetscErrorCode ierr;
1636637a0070SStefano Zampini 
1637637a0070SStefano Zampini   PetscFunctionBegin;
1638637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1639637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1640637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1641637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1642637a0070SStefano Zampini   }
1643637a0070SStefano Zampini 
1644637a0070SStefano Zampini   B    = *newmat;
1645637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1646637a0070SStefano Zampini   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1647637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1648637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1649637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
16506718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1651637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
16526718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1653637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1654637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1655637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1656637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1657637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1658637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1659637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1660637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1661d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1662637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1663637a0070SStefano Zampini   if (m->A) {
1664637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1665637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1666637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_BOTH;
1667637a0070SStefano Zampini   } else {
1668637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1669637a0070SStefano Zampini   }
1670637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1671637a0070SStefano Zampini 
1672637a0070SStefano Zampini   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1673637a0070SStefano Zampini   PetscFunctionReturn(0);
1674637a0070SStefano Zampini }
1675637a0070SStefano Zampini #endif
1676637a0070SStefano Zampini 
16776947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
16786947451fSStefano Zampini {
16796947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
16806947451fSStefano Zampini   PetscErrorCode ierr;
16816947451fSStefano Zampini   PetscInt       lda;
16826947451fSStefano Zampini 
16836947451fSStefano Zampini   PetscFunctionBegin;
1684*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1685*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
16866947451fSStefano Zampini   if (!a->cvec) {
16876947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
16886947451fSStefano Zampini   }
16896947451fSStefano Zampini   a->vecinuse = col + 1;
16906947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
16916947451fSStefano Zampini   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
16926947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
16936947451fSStefano Zampini   *v   = a->cvec;
16946947451fSStefano Zampini   PetscFunctionReturn(0);
16956947451fSStefano Zampini }
16966947451fSStefano Zampini 
16976947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
16986947451fSStefano Zampini {
16996947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17006947451fSStefano Zampini   PetscErrorCode ierr;
17016947451fSStefano Zampini 
17026947451fSStefano Zampini   PetscFunctionBegin;
17035ea7661aSPierre Jolivet   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
17046947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
17056947451fSStefano Zampini   a->vecinuse = 0;
17066947451fSStefano Zampini   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17076947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17086947451fSStefano Zampini   *v   = NULL;
17096947451fSStefano Zampini   PetscFunctionReturn(0);
17106947451fSStefano Zampini }
17116947451fSStefano Zampini 
17126947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
17136947451fSStefano Zampini {
17146947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17156947451fSStefano Zampini   PetscErrorCode ierr;
17166947451fSStefano Zampini   PetscInt       lda;
17176947451fSStefano Zampini 
17186947451fSStefano Zampini   PetscFunctionBegin;
1719*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1720*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17216947451fSStefano Zampini   if (!a->cvec) {
17226947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
17236947451fSStefano Zampini   }
17246947451fSStefano Zampini   a->vecinuse = col + 1;
17256947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
17266947451fSStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
17276947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
17286947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
17296947451fSStefano Zampini   *v   = a->cvec;
17306947451fSStefano Zampini   PetscFunctionReturn(0);
17316947451fSStefano Zampini }
17326947451fSStefano Zampini 
17336947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
17346947451fSStefano Zampini {
17356947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17366947451fSStefano Zampini   PetscErrorCode ierr;
17376947451fSStefano Zampini 
17386947451fSStefano Zampini   PetscFunctionBegin;
1739*616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1740*616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
17416947451fSStefano Zampini   a->vecinuse = 0;
17426947451fSStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
17436947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
17446947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17456947451fSStefano Zampini   *v   = NULL;
17466947451fSStefano Zampini   PetscFunctionReturn(0);
17476947451fSStefano Zampini }
17486947451fSStefano Zampini 
17496947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
17506947451fSStefano Zampini {
17516947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17526947451fSStefano Zampini   PetscErrorCode ierr;
17536947451fSStefano Zampini   PetscInt       lda;
17546947451fSStefano Zampini 
17556947451fSStefano Zampini   PetscFunctionBegin;
1756*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1757*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17586947451fSStefano Zampini   if (!a->cvec) {
17596947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
17606947451fSStefano Zampini   }
17616947451fSStefano Zampini   a->vecinuse = col + 1;
17626947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
17636947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17646947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
17656947451fSStefano Zampini   *v   = a->cvec;
17666947451fSStefano Zampini   PetscFunctionReturn(0);
17676947451fSStefano Zampini }
17686947451fSStefano Zampini 
17696947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
17706947451fSStefano Zampini {
17716947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17726947451fSStefano Zampini   PetscErrorCode ierr;
17736947451fSStefano Zampini 
17746947451fSStefano Zampini   PetscFunctionBegin;
1775*616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1776*616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
17776947451fSStefano Zampini   a->vecinuse = 0;
17786947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17796947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17806947451fSStefano Zampini   *v   = NULL;
17816947451fSStefano Zampini   PetscFunctionReturn(0);
17826947451fSStefano Zampini }
17836947451fSStefano Zampini 
17845ea7661aSPierre Jolivet PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
17855ea7661aSPierre Jolivet {
17865ea7661aSPierre Jolivet   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1787*616b8fbbSStefano Zampini   Mat_MPIDense   *c;
17885ea7661aSPierre Jolivet   PetscErrorCode ierr;
1789*616b8fbbSStefano Zampini   MPI_Comm       comm;
1790*616b8fbbSStefano Zampini   PetscBool      setup = PETSC_FALSE;
17915ea7661aSPierre Jolivet 
17925ea7661aSPierre Jolivet   PetscFunctionBegin;
1793*616b8fbbSStefano Zampini   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1794*616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1795*616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17965ea7661aSPierre Jolivet   if (!a->cmat) {
1797*616b8fbbSStefano Zampini     setup = PETSC_TRUE;
1798*616b8fbbSStefano Zampini     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1799*616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1800*616b8fbbSStefano Zampini     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1801*616b8fbbSStefano Zampini     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1802*616b8fbbSStefano Zampini     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1803*616b8fbbSStefano Zampini     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1804*616b8fbbSStefano Zampini   } else if (cend-cbegin != a->cmat->cmap->N) {
1805*616b8fbbSStefano Zampini     setup = PETSC_TRUE;
1806*616b8fbbSStefano Zampini     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1807*616b8fbbSStefano Zampini     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1808*616b8fbbSStefano Zampini     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1809*616b8fbbSStefano Zampini     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
18105ea7661aSPierre Jolivet   }
1811*616b8fbbSStefano Zampini   c = (Mat_MPIDense*)a->cmat->data;
1812*616b8fbbSStefano Zampini   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1813*616b8fbbSStefano Zampini   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1814*616b8fbbSStefano Zampini   if (setup) { /* do we really need this? */
1815*616b8fbbSStefano Zampini     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1816*616b8fbbSStefano Zampini   }
1817*616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1818*616b8fbbSStefano Zampini   a->cmat->assembled = PETSC_TRUE;
18195ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
18205ea7661aSPierre Jolivet   *v = a->cmat;
18215ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18225ea7661aSPierre Jolivet }
18235ea7661aSPierre Jolivet 
18245ea7661aSPierre Jolivet PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
18255ea7661aSPierre Jolivet {
18265ea7661aSPierre Jolivet   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1827*616b8fbbSStefano Zampini   Mat_MPIDense   *c;
18285ea7661aSPierre Jolivet   PetscErrorCode ierr;
18295ea7661aSPierre Jolivet 
18305ea7661aSPierre Jolivet   PetscFunctionBegin;
1831*616b8fbbSStefano Zampini   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1832*616b8fbbSStefano Zampini   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1833*616b8fbbSStefano Zampini   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
18345ea7661aSPierre Jolivet   a->matinuse = 0;
1835*616b8fbbSStefano Zampini   c    = (Mat_MPIDense*)a->cmat->data;
1836*616b8fbbSStefano Zampini   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
18375ea7661aSPierre Jolivet   *v   = NULL;
18385ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18395ea7661aSPierre Jolivet }
18405ea7661aSPierre Jolivet 
18418cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1842273d9f13SBarry Smith {
1843273d9f13SBarry Smith   Mat_MPIDense   *a;
1844dfbe8321SBarry Smith   PetscErrorCode ierr;
1845273d9f13SBarry Smith 
1846273d9f13SBarry Smith   PetscFunctionBegin;
1847b00a9115SJed Brown   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1848b0a32e0cSBarry Smith   mat->data = (void*)a;
1849273d9f13SBarry Smith   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1850273d9f13SBarry Smith 
1851273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1852273d9f13SBarry Smith 
1853273d9f13SBarry Smith   /* build cache for off array entries formed */
1854273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
18552205254eSKarl Rupp 
1856ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1857273d9f13SBarry Smith 
1858273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1859273d9f13SBarry Smith   a->lvec        = 0;
1860273d9f13SBarry Smith   a->Mvctx       = 0;
1861273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1862273d9f13SBarry Smith 
186349a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1864ad16ce7aSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1865bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
18668572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
18678572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
18688572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
18696947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
18706947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1871d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1872d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1873d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
18746947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
18756947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
18766947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
18776947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
18786947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
18796947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
18805ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
18815ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
18828baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
18838baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
18848baccfbdSHong Zhang #endif
1885637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1886637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1887637a0070SStefano Zampini #endif
1888bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
18894222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
18904222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
18916718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
18926718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
18936718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
18946718818eSStefano Zampini #endif
18958949adfdSHong Zhang 
1896af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1897af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
189838aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1899273d9f13SBarry Smith   PetscFunctionReturn(0);
1900273d9f13SBarry Smith }
1901273d9f13SBarry Smith 
1902209238afSKris Buschelman /*MC
1903637a0070SStefano Zampini    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1904637a0070SStefano Zampini 
1905637a0070SStefano Zampini    Options Database Keys:
1906637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1907637a0070SStefano Zampini 
1908637a0070SStefano Zampini   Level: beginner
1909637a0070SStefano Zampini 
1910637a0070SStefano Zampini .seealso:
1911637a0070SStefano Zampini 
1912637a0070SStefano Zampini M*/
1913637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1914637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1915637a0070SStefano Zampini {
1916637a0070SStefano Zampini   PetscErrorCode ierr;
1917637a0070SStefano Zampini 
1918637a0070SStefano Zampini   PetscFunctionBegin;
1919637a0070SStefano Zampini   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1920637a0070SStefano Zampini   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1921637a0070SStefano Zampini   PetscFunctionReturn(0);
1922637a0070SStefano Zampini }
1923637a0070SStefano Zampini #endif
1924637a0070SStefano Zampini 
1925637a0070SStefano Zampini /*MC
1926002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1927209238afSKris Buschelman 
1928209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1929209238afSKris Buschelman    and MATMPIDENSE otherwise.
1930209238afSKris Buschelman 
1931209238afSKris Buschelman    Options Database Keys:
1932209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1933209238afSKris Buschelman 
1934209238afSKris Buschelman   Level: beginner
1935209238afSKris Buschelman 
193601b82886SBarry Smith 
19376947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
19386947451fSStefano Zampini M*/
19396947451fSStefano Zampini 
19406947451fSStefano Zampini /*MC
19416947451fSStefano Zampini    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
19426947451fSStefano Zampini 
19436947451fSStefano Zampini    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
19446947451fSStefano Zampini    and MATMPIDENSECUDA otherwise.
19456947451fSStefano Zampini 
19466947451fSStefano Zampini    Options Database Keys:
19476947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
19486947451fSStefano Zampini 
19496947451fSStefano Zampini   Level: beginner
19506947451fSStefano Zampini 
19516947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1952209238afSKris Buschelman M*/
1953209238afSKris Buschelman 
1954273d9f13SBarry Smith /*@C
1955273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1956273d9f13SBarry Smith 
1957273d9f13SBarry Smith    Not collective
1958273d9f13SBarry Smith 
1959273d9f13SBarry Smith    Input Parameters:
19601c4f3114SJed Brown .  B - the matrix
19610298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1962273d9f13SBarry Smith    to control all matrix memory allocation.
1963273d9f13SBarry Smith 
1964273d9f13SBarry Smith    Notes:
1965273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1966273d9f13SBarry Smith    storage by columns.
1967273d9f13SBarry Smith 
1968273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1969273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
19700298fd71SBarry Smith    set data=NULL.
1971273d9f13SBarry Smith 
1972273d9f13SBarry Smith    Level: intermediate
1973273d9f13SBarry Smith 
1974273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1975273d9f13SBarry Smith @*/
19761c4f3114SJed Brown PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1977273d9f13SBarry Smith {
19784ac538c5SBarry Smith   PetscErrorCode ierr;
1979273d9f13SBarry Smith 
1980273d9f13SBarry Smith   PetscFunctionBegin;
1981d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
19821c4f3114SJed Brown   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1983273d9f13SBarry Smith   PetscFunctionReturn(0);
1984273d9f13SBarry Smith }
1985273d9f13SBarry Smith 
1986d3042a70SBarry Smith /*@
1987637a0070SStefano Zampini    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1988d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1989d3042a70SBarry Smith    into a matrix
1990d3042a70SBarry Smith 
1991d3042a70SBarry Smith    Not Collective
1992d3042a70SBarry Smith 
1993d3042a70SBarry Smith    Input Parameters:
1994d3042a70SBarry Smith +  mat - the matrix
1995d3042a70SBarry Smith -  array - the array in column major order
1996d3042a70SBarry Smith 
1997d3042a70SBarry Smith    Notes:
1998d3042a70SBarry Smith    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1999d3042a70SBarry Smith    freed when the matrix is destroyed.
2000d3042a70SBarry Smith 
2001d3042a70SBarry Smith    Level: developer
2002d3042a70SBarry Smith 
2003d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2004d3042a70SBarry Smith 
2005d3042a70SBarry Smith @*/
2006637a0070SStefano Zampini PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2007d3042a70SBarry Smith {
2008d3042a70SBarry Smith   PetscErrorCode ierr;
2009637a0070SStefano Zampini 
2010d3042a70SBarry Smith   PetscFunctionBegin;
2011d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2012d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2013d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2014637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2015637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2016637a0070SStefano Zampini #endif
2017d3042a70SBarry Smith   PetscFunctionReturn(0);
2018d3042a70SBarry Smith }
2019d3042a70SBarry Smith 
2020d3042a70SBarry Smith /*@
2021d3042a70SBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2022d3042a70SBarry Smith 
2023d3042a70SBarry Smith    Not Collective
2024d3042a70SBarry Smith 
2025d3042a70SBarry Smith    Input Parameters:
2026d3042a70SBarry Smith .  mat - the matrix
2027d3042a70SBarry Smith 
2028d3042a70SBarry Smith    Notes:
2029d3042a70SBarry Smith    You can only call this after a call to MatDensePlaceArray()
2030d3042a70SBarry Smith 
2031d3042a70SBarry Smith    Level: developer
2032d3042a70SBarry Smith 
2033d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2034d3042a70SBarry Smith 
2035d3042a70SBarry Smith @*/
2036d3042a70SBarry Smith PetscErrorCode  MatDenseResetArray(Mat mat)
2037d3042a70SBarry Smith {
2038d3042a70SBarry Smith   PetscErrorCode ierr;
2039637a0070SStefano Zampini 
2040d3042a70SBarry Smith   PetscFunctionBegin;
2041d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2042d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2043d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2044d3042a70SBarry Smith   PetscFunctionReturn(0);
2045d3042a70SBarry Smith }
2046d3042a70SBarry Smith 
2047d5ea218eSStefano Zampini /*@
2048d5ea218eSStefano Zampini    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2049d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2050d5ea218eSStefano Zampini    into a matrix
2051d5ea218eSStefano Zampini 
2052d5ea218eSStefano Zampini    Not Collective
2053d5ea218eSStefano Zampini 
2054d5ea218eSStefano Zampini    Input Parameters:
2055d5ea218eSStefano Zampini +  mat - the matrix
2056d5ea218eSStefano Zampini -  array - the array in column major order
2057d5ea218eSStefano Zampini 
2058d5ea218eSStefano Zampini    Notes:
2059d5ea218eSStefano Zampini    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2060d5ea218eSStefano Zampini    freed by the user. It will be freed when the matrix is destroyed.
2061d5ea218eSStefano Zampini 
2062d5ea218eSStefano Zampini    Level: developer
2063d5ea218eSStefano Zampini 
2064d5ea218eSStefano Zampini .seealso: MatDenseGetArray(), VecReplaceArray()
2065d5ea218eSStefano Zampini @*/
2066d5ea218eSStefano Zampini PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2067d5ea218eSStefano Zampini {
2068d5ea218eSStefano Zampini   PetscErrorCode ierr;
2069d5ea218eSStefano Zampini 
2070d5ea218eSStefano Zampini   PetscFunctionBegin;
2071d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2072d5ea218eSStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2073d5ea218eSStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2074d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
2075d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2076d5ea218eSStefano Zampini #endif
2077d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2078d5ea218eSStefano Zampini }
2079d5ea218eSStefano Zampini 
2080637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
20818965ea79SLois Curfman McInnes /*@C
2082637a0070SStefano Zampini    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2083637a0070SStefano Zampini    array provided by the user. This is useful to avoid copying an array
2084637a0070SStefano Zampini    into a matrix
2085637a0070SStefano Zampini 
2086637a0070SStefano Zampini    Not Collective
2087637a0070SStefano Zampini 
2088637a0070SStefano Zampini    Input Parameters:
2089637a0070SStefano Zampini +  mat - the matrix
2090637a0070SStefano Zampini -  array - the array in column major order
2091637a0070SStefano Zampini 
2092637a0070SStefano Zampini    Notes:
2093637a0070SStefano Zampini    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2094637a0070SStefano Zampini    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2095637a0070SStefano Zampini 
2096637a0070SStefano Zampini    Level: developer
2097637a0070SStefano Zampini 
2098637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2099637a0070SStefano Zampini @*/
2100637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2101637a0070SStefano Zampini {
2102637a0070SStefano Zampini   PetscErrorCode ierr;
2103637a0070SStefano Zampini 
2104637a0070SStefano Zampini   PetscFunctionBegin;
2105d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2106637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2107637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2108637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2109637a0070SStefano Zampini   PetscFunctionReturn(0);
2110637a0070SStefano Zampini }
2111637a0070SStefano Zampini 
2112637a0070SStefano Zampini /*@C
2113637a0070SStefano Zampini    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2114637a0070SStefano Zampini 
2115637a0070SStefano Zampini    Not Collective
2116637a0070SStefano Zampini 
2117637a0070SStefano Zampini    Input Parameters:
2118637a0070SStefano Zampini .  mat - the matrix
2119637a0070SStefano Zampini 
2120637a0070SStefano Zampini    Notes:
2121637a0070SStefano Zampini    You can only call this after a call to MatDenseCUDAPlaceArray()
2122637a0070SStefano Zampini 
2123637a0070SStefano Zampini    Level: developer
2124637a0070SStefano Zampini 
2125637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2126637a0070SStefano Zampini 
2127637a0070SStefano Zampini @*/
2128637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2129637a0070SStefano Zampini {
2130637a0070SStefano Zampini   PetscErrorCode ierr;
2131637a0070SStefano Zampini 
2132637a0070SStefano Zampini   PetscFunctionBegin;
2133d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2134637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2135637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2136637a0070SStefano Zampini   PetscFunctionReturn(0);
2137637a0070SStefano Zampini }
2138637a0070SStefano Zampini 
2139637a0070SStefano Zampini /*@C
2140d5ea218eSStefano Zampini    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2141d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2142d5ea218eSStefano Zampini    into a matrix
2143d5ea218eSStefano Zampini 
2144d5ea218eSStefano Zampini    Not Collective
2145d5ea218eSStefano Zampini 
2146d5ea218eSStefano Zampini    Input Parameters:
2147d5ea218eSStefano Zampini +  mat - the matrix
2148d5ea218eSStefano Zampini -  array - the array in column major order
2149d5ea218eSStefano Zampini 
2150d5ea218eSStefano Zampini    Notes:
2151d5ea218eSStefano Zampini    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2152d5ea218eSStefano Zampini    The memory passed in CANNOT be freed by the user. It will be freed
2153d5ea218eSStefano Zampini    when the matrix is destroyed. The array should respect the matrix leading dimension.
2154d5ea218eSStefano Zampini 
2155d5ea218eSStefano Zampini    Level: developer
2156d5ea218eSStefano Zampini 
2157d5ea218eSStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2158d5ea218eSStefano Zampini @*/
2159d5ea218eSStefano Zampini PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2160d5ea218eSStefano Zampini {
2161d5ea218eSStefano Zampini   PetscErrorCode ierr;
2162d5ea218eSStefano Zampini 
2163d5ea218eSStefano Zampini   PetscFunctionBegin;
2164d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2165d5ea218eSStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2166d5ea218eSStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2167d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2168d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2169d5ea218eSStefano Zampini }
2170d5ea218eSStefano Zampini 
2171d5ea218eSStefano Zampini /*@C
2172637a0070SStefano Zampini    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2173637a0070SStefano Zampini 
2174637a0070SStefano Zampini    Not Collective
2175637a0070SStefano Zampini 
2176637a0070SStefano Zampini    Input Parameters:
2177637a0070SStefano Zampini .  A - the matrix
2178637a0070SStefano Zampini 
2179637a0070SStefano Zampini    Output Parameters
2180637a0070SStefano Zampini .  array - the GPU array in column major order
2181637a0070SStefano Zampini 
2182637a0070SStefano Zampini    Notes:
2183637a0070SStefano Zampini    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2184637a0070SStefano Zampini 
2185637a0070SStefano Zampini    Level: developer
2186637a0070SStefano Zampini 
2187637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2188637a0070SStefano Zampini @*/
2189637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2190637a0070SStefano Zampini {
2191637a0070SStefano Zampini   PetscErrorCode ierr;
2192637a0070SStefano Zampini 
2193637a0070SStefano Zampini   PetscFunctionBegin;
2194d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2195637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2196637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2197637a0070SStefano Zampini   PetscFunctionReturn(0);
2198637a0070SStefano Zampini }
2199637a0070SStefano Zampini 
2200637a0070SStefano Zampini /*@C
2201637a0070SStefano Zampini    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2202637a0070SStefano Zampini 
2203637a0070SStefano Zampini    Not Collective
2204637a0070SStefano Zampini 
2205637a0070SStefano Zampini    Input Parameters:
2206637a0070SStefano Zampini +  A - the matrix
2207637a0070SStefano Zampini -  array - the GPU array in column major order
2208637a0070SStefano Zampini 
2209637a0070SStefano Zampini    Notes:
2210637a0070SStefano Zampini 
2211637a0070SStefano Zampini    Level: developer
2212637a0070SStefano Zampini 
2213637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2214637a0070SStefano Zampini @*/
2215637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2216637a0070SStefano Zampini {
2217637a0070SStefano Zampini   PetscErrorCode ierr;
2218637a0070SStefano Zampini 
2219637a0070SStefano Zampini   PetscFunctionBegin;
2220d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2222637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2223637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2224637a0070SStefano Zampini   PetscFunctionReturn(0);
2225637a0070SStefano Zampini }
2226637a0070SStefano Zampini 
2227637a0070SStefano Zampini /*@C
2228637a0070SStefano Zampini    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2229637a0070SStefano Zampini 
2230637a0070SStefano Zampini    Not Collective
2231637a0070SStefano Zampini 
2232637a0070SStefano Zampini    Input Parameters:
2233637a0070SStefano Zampini .  A - the matrix
2234637a0070SStefano Zampini 
2235637a0070SStefano Zampini    Output Parameters
2236637a0070SStefano Zampini .  array - the GPU array in column major order
2237637a0070SStefano Zampini 
2238637a0070SStefano Zampini    Notes:
2239637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2240637a0070SStefano Zampini 
2241637a0070SStefano Zampini    Level: developer
2242637a0070SStefano Zampini 
2243637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2244637a0070SStefano Zampini @*/
2245637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2246637a0070SStefano Zampini {
2247637a0070SStefano Zampini   PetscErrorCode ierr;
2248637a0070SStefano Zampini 
2249637a0070SStefano Zampini   PetscFunctionBegin;
2250d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2251637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2252637a0070SStefano Zampini   PetscFunctionReturn(0);
2253637a0070SStefano Zampini }
2254637a0070SStefano Zampini 
2255637a0070SStefano Zampini /*@C
2256637a0070SStefano Zampini    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2257637a0070SStefano Zampini 
2258637a0070SStefano Zampini    Not Collective
2259637a0070SStefano Zampini 
2260637a0070SStefano Zampini    Input Parameters:
2261637a0070SStefano Zampini +  A - the matrix
2262637a0070SStefano Zampini -  array - the GPU array in column major order
2263637a0070SStefano Zampini 
2264637a0070SStefano Zampini    Notes:
2265637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2266637a0070SStefano Zampini 
2267637a0070SStefano Zampini    Level: developer
2268637a0070SStefano Zampini 
2269637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2270637a0070SStefano Zampini @*/
2271637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2272637a0070SStefano Zampini {
2273637a0070SStefano Zampini   PetscErrorCode ierr;
2274637a0070SStefano Zampini 
2275637a0070SStefano Zampini   PetscFunctionBegin;
2276637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2277637a0070SStefano Zampini   PetscFunctionReturn(0);
2278637a0070SStefano Zampini }
2279637a0070SStefano Zampini 
2280637a0070SStefano Zampini /*@C
2281637a0070SStefano Zampini    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2282637a0070SStefano Zampini 
2283637a0070SStefano Zampini    Not Collective
2284637a0070SStefano Zampini 
2285637a0070SStefano Zampini    Input Parameters:
2286637a0070SStefano Zampini .  A - the matrix
2287637a0070SStefano Zampini 
2288637a0070SStefano Zampini    Output Parameters
2289637a0070SStefano Zampini .  array - the GPU array in column major order
2290637a0070SStefano Zampini 
2291637a0070SStefano Zampini    Notes:
2292637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2293637a0070SStefano Zampini 
2294637a0070SStefano Zampini    Level: developer
2295637a0070SStefano Zampini 
2296637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2297637a0070SStefano Zampini @*/
2298637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2299637a0070SStefano Zampini {
2300637a0070SStefano Zampini   PetscErrorCode ierr;
2301637a0070SStefano Zampini 
2302637a0070SStefano Zampini   PetscFunctionBegin;
2303d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2304637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2305637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2306637a0070SStefano Zampini   PetscFunctionReturn(0);
2307637a0070SStefano Zampini }
2308637a0070SStefano Zampini 
2309637a0070SStefano Zampini /*@C
2310637a0070SStefano Zampini    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2311637a0070SStefano Zampini 
2312637a0070SStefano Zampini    Not Collective
2313637a0070SStefano Zampini 
2314637a0070SStefano Zampini    Input Parameters:
2315637a0070SStefano Zampini +  A - the matrix
2316637a0070SStefano Zampini -  array - the GPU array in column major order
2317637a0070SStefano Zampini 
2318637a0070SStefano Zampini    Notes:
2319637a0070SStefano Zampini 
2320637a0070SStefano Zampini    Level: developer
2321637a0070SStefano Zampini 
2322637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2323637a0070SStefano Zampini @*/
2324637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2325637a0070SStefano Zampini {
2326637a0070SStefano Zampini   PetscErrorCode ierr;
2327637a0070SStefano Zampini 
2328637a0070SStefano Zampini   PetscFunctionBegin;
2329d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2330637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2331637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2332637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2333637a0070SStefano Zampini   PetscFunctionReturn(0);
2334637a0070SStefano Zampini }
2335637a0070SStefano Zampini #endif
2336637a0070SStefano Zampini 
2337637a0070SStefano Zampini /*@C
2338637a0070SStefano Zampini    MatCreateDense - Creates a matrix in dense format.
23398965ea79SLois Curfman McInnes 
2340d083f849SBarry Smith    Collective
2341db81eaa0SLois Curfman McInnes 
23428965ea79SLois Curfman McInnes    Input Parameters:
2343db81eaa0SLois Curfman McInnes +  comm - MPI communicator
23448965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2345db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
23468965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2347db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
23486cfe35ddSJose E. Roman -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2349dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
23508965ea79SLois Curfman McInnes 
23518965ea79SLois Curfman McInnes    Output Parameter:
2352477f1c0bSLois Curfman McInnes .  A - the matrix
23538965ea79SLois Curfman McInnes 
2354b259b22eSLois Curfman McInnes    Notes:
235539ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
235639ddd567SLois Curfman McInnes    storage by columns.
23578965ea79SLois Curfman McInnes 
235818f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
235918f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
23606cfe35ddSJose E. Roman    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
236118f449edSLois Curfman McInnes 
23628965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
23638965ea79SLois Curfman McInnes    (possibly both).
23648965ea79SLois Curfman McInnes 
2365027ccd11SLois Curfman McInnes    Level: intermediate
2366027ccd11SLois Curfman McInnes 
236739ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
23688965ea79SLois Curfman McInnes @*/
236969b1f4b7SBarry Smith PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
23708965ea79SLois Curfman McInnes {
23716849ba73SBarry Smith   PetscErrorCode ierr;
237213f74950SBarry Smith   PetscMPIInt    size;
23738965ea79SLois Curfman McInnes 
23743a40ed3dSBarry Smith   PetscFunctionBegin;
2375f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
23768491ab44SLisandro Dalcin   PetscValidLogicalCollectiveBool(*A,!!data,6);
2377f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2378273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2379273d9f13SBarry Smith   if (size > 1) {
2380273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2381273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
23826cfe35ddSJose E. Roman     if (data) {  /* user provided data array, so no need to assemble */
23836cfe35ddSJose E. Roman       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
23846cfe35ddSJose E. Roman       (*A)->assembled = PETSC_TRUE;
23856cfe35ddSJose E. Roman     }
2386273d9f13SBarry Smith   } else {
2387273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2388273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
23898c469469SLois Curfman McInnes   }
23903a40ed3dSBarry Smith   PetscFunctionReturn(0);
23918965ea79SLois Curfman McInnes }
23928965ea79SLois Curfman McInnes 
2393637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2394637a0070SStefano Zampini /*@C
2395637a0070SStefano Zampini    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2396637a0070SStefano Zampini 
2397637a0070SStefano Zampini    Collective
2398637a0070SStefano Zampini 
2399637a0070SStefano Zampini    Input Parameters:
2400637a0070SStefano Zampini +  comm - MPI communicator
2401637a0070SStefano Zampini .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2402637a0070SStefano Zampini .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2403637a0070SStefano Zampini .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2404637a0070SStefano Zampini .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2405637a0070SStefano Zampini -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2406637a0070SStefano Zampini    to control matrix memory allocation.
2407637a0070SStefano Zampini 
2408637a0070SStefano Zampini    Output Parameter:
2409637a0070SStefano Zampini .  A - the matrix
2410637a0070SStefano Zampini 
2411637a0070SStefano Zampini    Notes:
2412637a0070SStefano Zampini 
2413637a0070SStefano Zampini    Level: intermediate
2414637a0070SStefano Zampini 
2415637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense()
2416637a0070SStefano Zampini @*/
2417637a0070SStefano Zampini PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2418637a0070SStefano Zampini {
2419637a0070SStefano Zampini   PetscErrorCode ierr;
2420637a0070SStefano Zampini   PetscMPIInt    size;
2421637a0070SStefano Zampini 
2422637a0070SStefano Zampini   PetscFunctionBegin;
2423637a0070SStefano Zampini   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2424637a0070SStefano Zampini   PetscValidLogicalCollectiveBool(*A,!!data,6);
2425637a0070SStefano Zampini   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2426637a0070SStefano Zampini   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2427637a0070SStefano Zampini   if (size > 1) {
2428637a0070SStefano Zampini     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2429637a0070SStefano Zampini     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2430637a0070SStefano Zampini     if (data) {  /* user provided data array, so no need to assemble */
2431637a0070SStefano Zampini       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2432637a0070SStefano Zampini       (*A)->assembled = PETSC_TRUE;
2433637a0070SStefano Zampini     }
2434637a0070SStefano Zampini   } else {
2435637a0070SStefano Zampini     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2436637a0070SStefano Zampini     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2437637a0070SStefano Zampini   }
2438637a0070SStefano Zampini   PetscFunctionReturn(0);
2439637a0070SStefano Zampini }
2440637a0070SStefano Zampini #endif
2441637a0070SStefano Zampini 
24426849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
24438965ea79SLois Curfman McInnes {
24448965ea79SLois Curfman McInnes   Mat            mat;
24453501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2446dfbe8321SBarry Smith   PetscErrorCode ierr;
24478965ea79SLois Curfman McInnes 
24483a40ed3dSBarry Smith   PetscFunctionBegin;
24498965ea79SLois Curfman McInnes   *newmat = 0;
2450ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2451d0f46423SBarry Smith   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
24527adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2453834f8fabSBarry Smith   a       = (Mat_MPIDense*)mat->data;
24545aa7edbeSHong Zhang 
2455d5f3da31SBarry Smith   mat->factortype   = A->factortype;
2456c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
2457273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24588965ea79SLois Curfman McInnes 
2459e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
24603782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
2461e04c1aa4SHong Zhang 
24621e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
24631e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
24648965ea79SLois Curfman McInnes 
24655609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
24663bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2467637a0070SStefano Zampini   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
246801b82886SBarry Smith 
24698965ea79SLois Curfman McInnes   *newmat = mat;
24703a40ed3dSBarry Smith   PetscFunctionReturn(0);
24718965ea79SLois Curfman McInnes }
24728965ea79SLois Curfman McInnes 
2473eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2474eb91f321SVaclav Hapla {
2475eb91f321SVaclav Hapla   PetscErrorCode ierr;
247687d5ce66SSatish Balay   PetscBool      isbinary;
247787d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
247887d5ce66SSatish Balay   PetscBool      ishdf5;
247987d5ce66SSatish Balay #endif
2480eb91f321SVaclav Hapla 
2481eb91f321SVaclav Hapla   PetscFunctionBegin;
2482eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2483eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2484eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
2485eb91f321SVaclav Hapla   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2486eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
248787d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
2488eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
248987d5ce66SSatish Balay #endif
2490eb91f321SVaclav Hapla   if (isbinary) {
24918491ab44SLisandro Dalcin     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2492eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
249387d5ce66SSatish Balay   } else if (ishdf5) {
2494eb91f321SVaclav Hapla     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2495eb91f321SVaclav Hapla #endif
249687d5ce66SSatish Balay   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2497eb91f321SVaclav Hapla   PetscFunctionReturn(0);
2498eb91f321SVaclav Hapla }
2499eb91f321SVaclav Hapla 
25006718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
25016e4ee0c6SHong Zhang {
25026e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
25036e4ee0c6SHong Zhang   Mat            a,b;
2504ace3abfcSBarry Smith   PetscBool      flg;
25056e4ee0c6SHong Zhang   PetscErrorCode ierr;
250690ace30eSBarry Smith 
25076e4ee0c6SHong Zhang   PetscFunctionBegin;
25086e4ee0c6SHong Zhang   a    = matA->A;
25096e4ee0c6SHong Zhang   b    = matB->A;
25106e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2511b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
25126e4ee0c6SHong Zhang   PetscFunctionReturn(0);
25136e4ee0c6SHong Zhang }
251490ace30eSBarry Smith 
25156718818eSStefano Zampini PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2516baa3c1c6SHong Zhang {
2517baa3c1c6SHong Zhang   PetscErrorCode        ierr;
25186718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2519baa3c1c6SHong Zhang 
2520baa3c1c6SHong Zhang   PetscFunctionBegin;
2521637a0070SStefano Zampini   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2522637a0070SStefano Zampini   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2523baa3c1c6SHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
2524baa3c1c6SHong Zhang   PetscFunctionReturn(0);
2525baa3c1c6SHong Zhang }
2526baa3c1c6SHong Zhang 
25276718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2528cc48ffa7SToby Isaac {
2529cc48ffa7SToby Isaac   PetscErrorCode        ierr;
25306718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2531cc48ffa7SToby Isaac 
2532cc48ffa7SToby Isaac   PetscFunctionBegin;
2533cc48ffa7SToby Isaac   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2534faa55883SToby Isaac   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2535cc48ffa7SToby Isaac   ierr = PetscFree(abt);CHKERRQ(ierr);
2536cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2537cc48ffa7SToby Isaac }
2538cc48ffa7SToby Isaac 
25396718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2540cb20be35SHong Zhang {
2541baa3c1c6SHong Zhang   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
25426718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2543cb20be35SHong Zhang   PetscErrorCode        ierr;
2544cb20be35SHong Zhang   MPI_Comm              comm;
25456718818eSStefano Zampini   PetscMPIInt           size,*recvcounts;
25466718818eSStefano Zampini   PetscScalar           *carray,*sendbuf;
2547637a0070SStefano Zampini   const PetscScalar     *atbarray;
2548d5017740SHong Zhang   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2549e68c0b26SHong Zhang   const PetscInt        *ranges;
2550cb20be35SHong Zhang 
2551cb20be35SHong Zhang   PetscFunctionBegin;
25526718818eSStefano Zampini   MatCheckProduct(C,3);
25536718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
25546718818eSStefano Zampini   atb = (Mat_TransMatMultDense *)C->product->data;
25556718818eSStefano Zampini   recvcounts = atb->recvcounts;
25566718818eSStefano Zampini   sendbuf = atb->sendbuf;
25576718818eSStefano Zampini 
2558cb20be35SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2559cb20be35SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2560e68c0b26SHong Zhang 
2561c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
2562637a0070SStefano Zampini   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2563cb20be35SHong Zhang 
2564cb20be35SHong Zhang   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2565cb20be35SHong Zhang 
2566660d5466SHong Zhang   /* arrange atbarray into sendbuf */
2567637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2568637a0070SStefano Zampini   for (proc=0, k=0; proc<size; proc++) {
2569baa3c1c6SHong Zhang     for (j=0; j<cN; j++) {
2570c5ef1628SHong Zhang       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2571cb20be35SHong Zhang     }
2572cb20be35SHong Zhang   }
2573637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2574637a0070SStefano Zampini 
2575c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
25760acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
25773462b7efSHong Zhang   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
25780acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
25790acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25800acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2581cb20be35SHong Zhang   PetscFunctionReturn(0);
2582cb20be35SHong Zhang }
2583cb20be35SHong Zhang 
25846718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2585cb20be35SHong Zhang {
2586cb20be35SHong Zhang   PetscErrorCode        ierr;
2587cb20be35SHong Zhang   MPI_Comm              comm;
2588baa3c1c6SHong Zhang   PetscMPIInt           size;
2589660d5466SHong Zhang   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2590baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
25916718818eSStefano Zampini   PetscBool             cisdense;
25920acaeb71SStefano Zampini   PetscInt              i;
25930acaeb71SStefano Zampini   const PetscInt        *ranges;
2594cb20be35SHong Zhang 
2595cb20be35SHong Zhang   PetscFunctionBegin;
25966718818eSStefano Zampini   MatCheckProduct(C,3);
25976718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2598baa3c1c6SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2599cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2600cb20be35SHong Zhang     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2601cb20be35SHong Zhang   }
2602cb20be35SHong Zhang 
26034222ddf1SHong Zhang   /* create matrix product C */
26046718818eSStefano Zampini   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
26056718818eSStefano Zampini   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
26066718818eSStefano Zampini   if (!cisdense) {
26076718818eSStefano Zampini     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
26086718818eSStefano Zampini   }
260918992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
2610baa3c1c6SHong Zhang 
26114222ddf1SHong Zhang   /* create data structure for reuse C */
2612baa3c1c6SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2613baa3c1c6SHong Zhang   ierr = PetscNew(&atb);CHKERRQ(ierr);
26144222ddf1SHong Zhang   cM   = C->rmap->N;
26156718818eSStefano Zampini   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
26160acaeb71SStefano Zampini   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
26170acaeb71SStefano Zampini   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2618baa3c1c6SHong Zhang 
26196718818eSStefano Zampini   C->product->data    = atb;
26206718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2621cb20be35SHong Zhang   PetscFunctionReturn(0);
2622cb20be35SHong Zhang }
2623cb20be35SHong Zhang 
26244222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2625cb20be35SHong Zhang {
2626cb20be35SHong Zhang   PetscErrorCode        ierr;
2627cc48ffa7SToby Isaac   MPI_Comm              comm;
2628cc48ffa7SToby Isaac   PetscMPIInt           i, size;
2629cc48ffa7SToby Isaac   PetscInt              maxRows, bufsiz;
2630cc48ffa7SToby Isaac   PetscMPIInt           tag;
26314222ddf1SHong Zhang   PetscInt              alg;
2632cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
26334222ddf1SHong Zhang   Mat_Product           *product = C->product;
26344222ddf1SHong Zhang   PetscBool             flg;
2635cc48ffa7SToby Isaac 
2636cc48ffa7SToby Isaac   PetscFunctionBegin;
26376718818eSStefano Zampini   MatCheckProduct(C,4);
26386718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
26394222ddf1SHong Zhang   /* check local size of A and B */
2640637a0070SStefano Zampini   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2641cc48ffa7SToby Isaac 
26424222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2643637a0070SStefano Zampini   alg  = flg ? 0 : 1;
2644cc48ffa7SToby Isaac 
26454222ddf1SHong Zhang   /* setup matrix product C */
26464222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
26474222ddf1SHong Zhang   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
264818992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
26494222ddf1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
26504222ddf1SHong Zhang 
26514222ddf1SHong Zhang   /* create data structure for reuse C */
26524222ddf1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2653cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2654cc48ffa7SToby Isaac   ierr = PetscNew(&abt);CHKERRQ(ierr);
2655cc48ffa7SToby Isaac   abt->tag = tag;
2656faa55883SToby Isaac   abt->alg = alg;
2657faa55883SToby Isaac   switch (alg) {
26584222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2659cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2660cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
2661cc48ffa7SToby Isaac     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2662faa55883SToby Isaac     break;
26634222ddf1SHong Zhang   default: /* alg: "allgatherv" */
2664faa55883SToby Isaac     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2665faa55883SToby Isaac     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2666faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2667faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2668faa55883SToby Isaac     break;
2669faa55883SToby Isaac   }
2670cc48ffa7SToby Isaac 
26716718818eSStefano Zampini   C->product->data    = abt;
26726718818eSStefano Zampini   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
26734222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2674cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2675cc48ffa7SToby Isaac }
2676cc48ffa7SToby Isaac 
2677faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2678cc48ffa7SToby Isaac {
2679cc48ffa7SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
26806718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2681cc48ffa7SToby Isaac   PetscErrorCode        ierr;
2682cc48ffa7SToby Isaac   MPI_Comm              comm;
2683cc48ffa7SToby Isaac   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2684637a0070SStefano Zampini   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2685cc48ffa7SToby Isaac   PetscInt              i,cK=A->cmap->N,k,j,bn;
2686cc48ffa7SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2687637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2688637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2689cc48ffa7SToby Isaac   MPI_Request           reqs[2];
2690cc48ffa7SToby Isaac   const PetscInt        *ranges;
2691cc48ffa7SToby Isaac 
2692cc48ffa7SToby Isaac   PetscFunctionBegin;
26936718818eSStefano Zampini   MatCheckProduct(C,3);
26946718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
26956718818eSStefano Zampini   abt  = (Mat_MatTransMultDense*)C->product->data;
26966718818eSStefano Zampini   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2697cc48ffa7SToby Isaac   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2698cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2699637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2700637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
27010acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2702637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2703637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2704637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2705637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2706637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2707637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2708cc48ffa7SToby Isaac   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2709cc48ffa7SToby Isaac   bn   = B->rmap->n;
2710637a0070SStefano Zampini   if (blda == bn) {
2711637a0070SStefano Zampini     sendbuf = (PetscScalar*)bv;
2712cc48ffa7SToby Isaac   } else {
2713cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2714cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2715cc48ffa7SToby Isaac       for (j = 0; j < bn; j++, k++) {
2716637a0070SStefano Zampini         sendbuf[k] = bv[i * blda + j];
2717cc48ffa7SToby Isaac       }
2718cc48ffa7SToby Isaac     }
2719cc48ffa7SToby Isaac   }
2720cc48ffa7SToby Isaac   if (size > 1) {
2721cc48ffa7SToby Isaac     sendto = (rank + size - 1) % size;
2722cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2723cc48ffa7SToby Isaac   } else {
2724cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2725cc48ffa7SToby Isaac   }
2726cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2727cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2728cc48ffa7SToby Isaac   recvisfrom = rank;
2729cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2730cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2731cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2732cc48ffa7SToby Isaac     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2733cc48ffa7SToby Isaac 
2734cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2735cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2736cc48ffa7SToby Isaac       sendsiz = cK * bn;
2737cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2738cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2739cc48ffa7SToby Isaac       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2740cc48ffa7SToby Isaac       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2741cc48ffa7SToby Isaac     }
2742cc48ffa7SToby Isaac 
2743cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
2744cc48ffa7SToby Isaac     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
274550ce3c9cSStefano Zampini     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2746cc48ffa7SToby Isaac 
2747cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2748cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2749cc48ffa7SToby Isaac       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2750cc48ffa7SToby Isaac     }
2751cc48ffa7SToby Isaac     bn = nextbn;
2752cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2753cc48ffa7SToby Isaac     sendbuf = recvbuf;
2754cc48ffa7SToby Isaac   }
2755637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2756637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
27570acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
27580acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27590acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2761cc48ffa7SToby Isaac }
2762cc48ffa7SToby Isaac 
2763faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2764faa55883SToby Isaac {
2765faa55883SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
27666718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2767faa55883SToby Isaac   PetscErrorCode        ierr;
2768faa55883SToby Isaac   MPI_Comm              comm;
2769637a0070SStefano Zampini   PetscMPIInt           size;
2770637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2771637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2772637a0070SStefano Zampini   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2773faa55883SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2774637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, clda;
2775faa55883SToby Isaac 
2776faa55883SToby Isaac   PetscFunctionBegin;
27776718818eSStefano Zampini   MatCheckProduct(C,3);
27786718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
27796718818eSStefano Zampini   abt  = (Mat_MatTransMultDense*)C->product->data;
2780faa55883SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2781faa55883SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2782637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2783637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
27840acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2785637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2786637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2787637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2788637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2789637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2790faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2791faa55883SToby Isaac   bn      = B->rmap->n;
2792faa55883SToby Isaac   sendbuf = abt->buf[0];
2793faa55883SToby Isaac   recvbuf = abt->buf[1];
2794faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2795faa55883SToby Isaac     for (i = 0; i < cK; i++, k++) {
2796637a0070SStefano Zampini       sendbuf[k] = bv[i * blda + j];
2797faa55883SToby Isaac     }
2798faa55883SToby Isaac   }
2799637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2800faa55883SToby Isaac   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2801faa55883SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2802faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2803faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
280450ce3c9cSStefano Zampini   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2805637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2806637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
28070acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
28080acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28090acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2810faa55883SToby Isaac   PetscFunctionReturn(0);
2811faa55883SToby Isaac }
2812faa55883SToby Isaac 
2813faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2814faa55883SToby Isaac {
28156718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2816faa55883SToby Isaac   PetscErrorCode        ierr;
2817faa55883SToby Isaac 
2818faa55883SToby Isaac   PetscFunctionBegin;
28196718818eSStefano Zampini   MatCheckProduct(C,3);
28206718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
28216718818eSStefano Zampini   abt = (Mat_MatTransMultDense*)C->product->data;
2822faa55883SToby Isaac   switch (abt->alg) {
2823faa55883SToby Isaac   case 1:
2824faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2825faa55883SToby Isaac     break;
2826faa55883SToby Isaac   default:
2827faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2828faa55883SToby Isaac     break;
2829faa55883SToby Isaac   }
2830faa55883SToby Isaac   PetscFunctionReturn(0);
2831faa55883SToby Isaac }
2832faa55883SToby Isaac 
28336718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2834320f2790SHong Zhang {
2835320f2790SHong Zhang   PetscErrorCode   ierr;
28366718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2837320f2790SHong Zhang 
2838320f2790SHong Zhang   PetscFunctionBegin;
2839320f2790SHong Zhang   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2840320f2790SHong Zhang   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2841320f2790SHong Zhang   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2842320f2790SHong Zhang   ierr = PetscFree(ab);CHKERRQ(ierr);
2843320f2790SHong Zhang   PetscFunctionReturn(0);
2844320f2790SHong Zhang }
2845320f2790SHong Zhang 
28465242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
2847320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2848320f2790SHong Zhang {
2849320f2790SHong Zhang   PetscErrorCode   ierr;
28506718818eSStefano Zampini   Mat_MatMultDense *ab;
2851320f2790SHong Zhang 
2852320f2790SHong Zhang   PetscFunctionBegin;
28536718818eSStefano Zampini   MatCheckProduct(C,3);
28546718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
28556718818eSStefano Zampini   ab   = (Mat_MatMultDense*)C->product->data;
2856de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2857de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
28584222ddf1SHong Zhang   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2859de0a22f0SHong Zhang   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2860320f2790SHong Zhang   PetscFunctionReturn(0);
2861320f2790SHong Zhang }
2862320f2790SHong Zhang 
28636718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2864320f2790SHong Zhang {
2865320f2790SHong Zhang   PetscErrorCode   ierr;
2866320f2790SHong Zhang   Mat              Ae,Be,Ce;
2867320f2790SHong Zhang   Mat_MatMultDense *ab;
2868320f2790SHong Zhang 
2869320f2790SHong Zhang   PetscFunctionBegin;
28706718818eSStefano Zampini   MatCheckProduct(C,4);
28716718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
28724222ddf1SHong Zhang   /* check local size of A and B */
2873320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2874378336b6SHong Zhang     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2875320f2790SHong Zhang   }
2876320f2790SHong Zhang 
28774222ddf1SHong Zhang   /* create elemental matrices Ae and Be */
28784222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
28794222ddf1SHong Zhang   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
28804222ddf1SHong Zhang   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
28814222ddf1SHong Zhang   ierr = MatSetUp(Ae);CHKERRQ(ierr);
28824222ddf1SHong Zhang   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2883320f2790SHong Zhang 
28844222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
28854222ddf1SHong Zhang   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
28864222ddf1SHong Zhang   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
28874222ddf1SHong Zhang   ierr = MatSetUp(Be);CHKERRQ(ierr);
28884222ddf1SHong Zhang   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2889320f2790SHong Zhang 
28904222ddf1SHong Zhang   /* compute symbolic Ce = Ae*Be */
28914222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
28924222ddf1SHong Zhang   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
28934222ddf1SHong Zhang 
28944222ddf1SHong Zhang   /* setup C */
28954222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
28964222ddf1SHong Zhang   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
28974222ddf1SHong Zhang   ierr = MatSetUp(C);CHKERRQ(ierr);
2898320f2790SHong Zhang 
2899320f2790SHong Zhang   /* create data structure for reuse Cdense */
2900320f2790SHong Zhang   ierr = PetscNew(&ab);CHKERRQ(ierr);
2901320f2790SHong Zhang   ab->Ae = Ae;
2902320f2790SHong Zhang   ab->Be = Be;
2903320f2790SHong Zhang   ab->Ce = Ce;
29046718818eSStefano Zampini 
29056718818eSStefano Zampini   C->product->data    = ab;
29066718818eSStefano Zampini   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
29074222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2908320f2790SHong Zhang   PetscFunctionReturn(0);
2909320f2790SHong Zhang }
29104222ddf1SHong Zhang #endif
29114222ddf1SHong Zhang /* ----------------------------------------------- */
29124222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
29134222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2914320f2790SHong Zhang {
2915320f2790SHong Zhang   PetscFunctionBegin;
29164222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
29174222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
2918320f2790SHong Zhang   PetscFunctionReturn(0);
2919320f2790SHong Zhang }
29205242a7b1SHong Zhang #endif
292186aefd0dSHong Zhang 
29224222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
29234222ddf1SHong Zhang {
29244222ddf1SHong Zhang   Mat_Product *product = C->product;
29254222ddf1SHong Zhang   Mat         A = product->A,B=product->B;
29264222ddf1SHong Zhang 
29274222ddf1SHong Zhang   PetscFunctionBegin;
29284222ddf1SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
29294222ddf1SHong Zhang     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
29306718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
29316718818eSStefano Zampini   C->ops->productsymbolic = MatProductSymbolic_AtB;
29324222ddf1SHong Zhang   PetscFunctionReturn(0);
29334222ddf1SHong Zhang }
29344222ddf1SHong Zhang 
29354222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
29364222ddf1SHong Zhang {
29374222ddf1SHong Zhang   PetscErrorCode ierr;
29384222ddf1SHong Zhang   Mat_Product    *product = C->product;
29394222ddf1SHong Zhang   const char     *algTypes[2] = {"allgatherv","cyclic"};
29404222ddf1SHong Zhang   PetscInt       alg,nalg = 2;
29414222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
29424222ddf1SHong Zhang 
29434222ddf1SHong Zhang   PetscFunctionBegin;
29444222ddf1SHong Zhang   /* Set default algorithm */
29454222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
29464222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
29474222ddf1SHong Zhang   if (flg) {
29484222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29494222ddf1SHong Zhang   }
29504222ddf1SHong Zhang 
29514222ddf1SHong Zhang   /* Get runtime option */
29524222ddf1SHong Zhang   if (product->api_user) {
29534222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
29544222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29554222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
29564222ddf1SHong Zhang   } else {
29574222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
29584222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29594222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
29604222ddf1SHong Zhang   }
29614222ddf1SHong Zhang   if (flg) {
29624222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29634222ddf1SHong Zhang   }
29644222ddf1SHong Zhang 
29654222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
29664222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
29674222ddf1SHong Zhang   PetscFunctionReturn(0);
29684222ddf1SHong Zhang }
29694222ddf1SHong Zhang 
29704222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
29714222ddf1SHong Zhang {
29724222ddf1SHong Zhang   PetscErrorCode ierr;
29734222ddf1SHong Zhang   Mat_Product    *product = C->product;
29744222ddf1SHong Zhang 
29754222ddf1SHong Zhang   PetscFunctionBegin;
29764222ddf1SHong Zhang   switch (product->type) {
29774222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
29784222ddf1SHong Zhang   case MATPRODUCT_AB:
29794222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
29804222ddf1SHong Zhang     break;
29814222ddf1SHong Zhang #endif
29824222ddf1SHong Zhang   case MATPRODUCT_AtB:
29834222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
29844222ddf1SHong Zhang     break;
29854222ddf1SHong Zhang   case MATPRODUCT_ABt:
29864222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
29874222ddf1SHong Zhang     break;
29886718818eSStefano Zampini   default:
29896718818eSStefano Zampini     break;
29904222ddf1SHong Zhang   }
29914222ddf1SHong Zhang   PetscFunctionReturn(0);
29924222ddf1SHong Zhang }
2993