xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d24d420451aab1d024a728b511d7b61b40627436)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
8baa3c1c6SHong Zhang #include <petscblaslapack.h>
98965ea79SLois Curfman McInnes 
10ab92ecdeSBarry Smith /*@
11ab92ecdeSBarry Smith 
12ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
14ab92ecdeSBarry Smith 
15ab92ecdeSBarry Smith     Input Parameter:
16ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
17ab92ecdeSBarry Smith 
18ab92ecdeSBarry Smith     Output Parameter:
19ab92ecdeSBarry Smith .      B - the inner matrix
20ab92ecdeSBarry Smith 
218e6c10adSSatish Balay     Level: intermediate
228e6c10adSSatish Balay 
23ab92ecdeSBarry Smith @*/
24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25ab92ecdeSBarry Smith {
26ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27ab92ecdeSBarry Smith   PetscErrorCode ierr;
28ace3abfcSBarry Smith   PetscBool      flg;
29ab92ecdeSBarry Smith 
30ab92ecdeSBarry Smith   PetscFunctionBegin;
31d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32d5ea218eSStefano Zampini   PetscValidPointer(B,2);
33251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
342205254eSKarl Rupp   if (flg) *B = mat->A;
352205254eSKarl Rupp   else *B = A;
36ab92ecdeSBarry Smith   PetscFunctionReturn(0);
37ab92ecdeSBarry Smith }
38ab92ecdeSBarry Smith 
39ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40ba8c8a56SBarry Smith {
41ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42ba8c8a56SBarry Smith   PetscErrorCode ierr;
43d0f46423SBarry Smith   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44ba8c8a56SBarry Smith 
45ba8c8a56SBarry Smith   PetscFunctionBegin;
46e7e72b3dSBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47ba8c8a56SBarry Smith   lrow = row - rstart;
48ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49ba8c8a56SBarry Smith   PetscFunctionReturn(0);
50ba8c8a56SBarry Smith }
51ba8c8a56SBarry Smith 
52637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53ba8c8a56SBarry Smith {
54637a0070SStefano Zampini   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55ba8c8a56SBarry Smith   PetscErrorCode ierr;
56637a0070SStefano Zampini   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57ba8c8a56SBarry Smith 
58ba8c8a56SBarry Smith   PetscFunctionBegin;
59637a0070SStefano Zampini   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60637a0070SStefano Zampini   lrow = row - rstart;
61637a0070SStefano Zampini   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62ba8c8a56SBarry Smith   PetscFunctionReturn(0);
63ba8c8a56SBarry Smith }
64ba8c8a56SBarry Smith 
6511bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
660de54da6SSatish Balay {
670de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
686849ba73SBarry Smith   PetscErrorCode ierr;
69d0f46423SBarry Smith   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
7087828ca2SBarry Smith   PetscScalar    *array;
710de54da6SSatish Balay   MPI_Comm       comm;
724b6ae80fSStefano Zampini   PetscBool      flg;
7311bd1e4dSLisandro Dalcin   Mat            B;
740de54da6SSatish Balay 
750de54da6SSatish Balay   PetscFunctionBegin;
764b6ae80fSStefano Zampini   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
774b6ae80fSStefano Zampini   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
7811bd1e4dSLisandro Dalcin   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79616b8fbbSStefano Zampini   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
804b6ae80fSStefano Zampini 
814b6ae80fSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
824b6ae80fSStefano Zampini     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
830de54da6SSatish Balay     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
8411bd1e4dSLisandro Dalcin     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
8511bd1e4dSLisandro Dalcin     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
8611bd1e4dSLisandro Dalcin     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
874b6ae80fSStefano Zampini     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
8811bd1e4dSLisandro Dalcin     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
894b6ae80fSStefano Zampini     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
9011bd1e4dSLisandro Dalcin     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9111bd1e4dSLisandro Dalcin     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9211bd1e4dSLisandro Dalcin     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
9311bd1e4dSLisandro Dalcin     *a   = B;
9411bd1e4dSLisandro Dalcin     ierr = MatDestroy(&B);CHKERRQ(ierr);
952205254eSKarl Rupp   } else *a = B;
960de54da6SSatish Balay   PetscFunctionReturn(0);
970de54da6SSatish Balay }
980de54da6SSatish Balay 
9913f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
1008965ea79SLois Curfman McInnes {
10139b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102dfbe8321SBarry Smith   PetscErrorCode ierr;
103d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104ace3abfcSBarry Smith   PetscBool      roworiented = A->roworiented;
1058965ea79SLois Curfman McInnes 
1063a40ed3dSBarry Smith   PetscFunctionBegin;
1078965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1085ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
109e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1108965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1118965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
11239b7565bSBarry Smith       if (roworiented) {
11339b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1143a40ed3dSBarry Smith       } else {
1158965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1165ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
117e32f2f54SBarry Smith           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
11839b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
11939b7565bSBarry Smith         }
1208965ea79SLois Curfman McInnes       }
1212205254eSKarl Rupp     } else if (!A->donotstash) {
1225080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
12339b7565bSBarry Smith       if (roworiented) {
124b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125d36fbae8SSatish Balay       } else {
126b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
12739b7565bSBarry Smith       }
128b49de8d1SLois Curfman McInnes     }
129b49de8d1SLois Curfman McInnes   }
1303a40ed3dSBarry Smith   PetscFunctionReturn(0);
131b49de8d1SLois Curfman McInnes }
132b49de8d1SLois Curfman McInnes 
13313f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134b49de8d1SLois Curfman McInnes {
135b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136dfbe8321SBarry Smith   PetscErrorCode ierr;
137d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138b49de8d1SLois Curfman McInnes 
1393a40ed3dSBarry Smith   PetscFunctionBegin;
140b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
141e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
144b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
145b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
146e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147e7e72b3dSBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149b49de8d1SLois Curfman McInnes       }
150e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
1518965ea79SLois Curfman McInnes   }
1523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1538965ea79SLois Curfman McInnes }
1548965ea79SLois Curfman McInnes 
15549a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
15649a6ff4bSBarry Smith {
15749a6ff4bSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
15849a6ff4bSBarry Smith   PetscErrorCode ierr;
15949a6ff4bSBarry Smith 
16049a6ff4bSBarry Smith   PetscFunctionBegin;
16149a6ff4bSBarry Smith   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
16249a6ff4bSBarry Smith   PetscFunctionReturn(0);
16349a6ff4bSBarry Smith }
16449a6ff4bSBarry Smith 
165ad16ce7aSStefano Zampini static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166ad16ce7aSStefano Zampini {
167ad16ce7aSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168ad16ce7aSStefano Zampini   PetscErrorCode ierr;
169ad16ce7aSStefano Zampini 
170ad16ce7aSStefano Zampini   PetscFunctionBegin;
171ad16ce7aSStefano Zampini   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172ad16ce7aSStefano Zampini   PetscFunctionReturn(0);
173ad16ce7aSStefano Zampini }
174ad16ce7aSStefano Zampini 
175637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176ff14e315SSatish Balay {
177ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178dfbe8321SBarry Smith   PetscErrorCode ierr;
179ff14e315SSatish Balay 
1803a40ed3dSBarry Smith   PetscFunctionBegin;
181616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1828c778c55SBarry Smith   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
1833a40ed3dSBarry Smith   PetscFunctionReturn(0);
184ff14e315SSatish Balay }
185ff14e315SSatish Balay 
186637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
1878572280aSBarry Smith {
1888572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1898572280aSBarry Smith   PetscErrorCode ierr;
1908572280aSBarry Smith 
1918572280aSBarry Smith   PetscFunctionBegin;
192616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1938572280aSBarry Smith   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
1948572280aSBarry Smith   PetscFunctionReturn(0);
1958572280aSBarry Smith }
1968572280aSBarry Smith 
1976947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
1986947451fSStefano Zampini {
1996947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
2006947451fSStefano Zampini   PetscErrorCode ierr;
2016947451fSStefano Zampini 
2026947451fSStefano Zampini   PetscFunctionBegin;
203616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
2046947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
2056947451fSStefano Zampini   PetscFunctionReturn(0);
2066947451fSStefano Zampini }
2076947451fSStefano Zampini 
208637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
209d3042a70SBarry Smith {
210d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211d3042a70SBarry Smith   PetscErrorCode ierr;
212d3042a70SBarry Smith 
213d3042a70SBarry Smith   PetscFunctionBegin;
214616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
215616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216d3042a70SBarry Smith   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
217d3042a70SBarry Smith   PetscFunctionReturn(0);
218d3042a70SBarry Smith }
219d3042a70SBarry Smith 
220d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
221d3042a70SBarry Smith {
222d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223d3042a70SBarry Smith   PetscErrorCode ierr;
224d3042a70SBarry Smith 
225d3042a70SBarry Smith   PetscFunctionBegin;
226616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
227616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
228d3042a70SBarry Smith   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
229d3042a70SBarry Smith   PetscFunctionReturn(0);
230d3042a70SBarry Smith }
231d3042a70SBarry Smith 
232d5ea218eSStefano Zampini static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
233d5ea218eSStefano Zampini {
234d5ea218eSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
235d5ea218eSStefano Zampini   PetscErrorCode ierr;
236d5ea218eSStefano Zampini 
237d5ea218eSStefano Zampini   PetscFunctionBegin;
238616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240d5ea218eSStefano Zampini   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
241d5ea218eSStefano Zampini   PetscFunctionReturn(0);
242d5ea218eSStefano Zampini }
243d5ea218eSStefano Zampini 
2447dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
245ca3fa75bSLois Curfman McInnes {
246ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
2476849ba73SBarry Smith   PetscErrorCode    ierr;
248637a0070SStefano Zampini   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
2495d0c19d7SBarry Smith   const PetscInt    *irow,*icol;
250637a0070SStefano Zampini   const PetscScalar *v;
251637a0070SStefano Zampini   PetscScalar       *bv;
252ca3fa75bSLois Curfman McInnes   Mat               newmat;
2534aa3045dSJed Brown   IS                iscol_local;
25442a884f0SBarry Smith   MPI_Comm          comm_is,comm_mat;
255ca3fa75bSLois Curfman McInnes 
256ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
25742a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
25842a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
25942a884f0SBarry Smith   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
26042a884f0SBarry Smith 
2614aa3045dSJed Brown   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
262ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
2634aa3045dSJed Brown   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
264b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
265b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
2664aa3045dSJed Brown   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
267ca3fa75bSLois Curfman McInnes 
268ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2697eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2707eba5e9cSLois Curfman McInnes      original matrix! */
271ca3fa75bSLois Curfman McInnes 
272ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
2737eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
274ca3fa75bSLois Curfman McInnes 
275ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
276ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
277e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2787eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
279ca3fa75bSLois Curfman McInnes     newmat = *B;
280ca3fa75bSLois Curfman McInnes   } else {
281ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
282ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
2834aa3045dSJed Brown     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
2847adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2850298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
286ca3fa75bSLois Curfman McInnes   }
287ca3fa75bSLois Curfman McInnes 
288ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
289ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
290637a0070SStefano Zampini   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
291637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
292637a0070SStefano Zampini   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
2934aa3045dSJed Brown   for (i=0; i<Ncols; i++) {
294637a0070SStefano Zampini     const PetscScalar *av = v + lda*icol[i];
295ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2967eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
297ca3fa75bSLois Curfman McInnes     }
298ca3fa75bSLois Curfman McInnes   }
299637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
300637a0070SStefano Zampini   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
301ca3fa75bSLois Curfman McInnes 
302ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
303ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
305ca3fa75bSLois Curfman McInnes 
306ca3fa75bSLois Curfman McInnes   /* Free work space */
307ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
3085bdf786aSShri Abhyankar   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
30932bb1f2dSLisandro Dalcin   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
310ca3fa75bSLois Curfman McInnes   *B   = newmat;
311ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
312ca3fa75bSLois Curfman McInnes }
313ca3fa75bSLois Curfman McInnes 
314637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
315ff14e315SSatish Balay {
31673a71a0fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
31773a71a0fSBarry Smith   PetscErrorCode ierr;
31873a71a0fSBarry Smith 
3193a40ed3dSBarry Smith   PetscFunctionBegin;
3208c778c55SBarry Smith   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
3213a40ed3dSBarry Smith   PetscFunctionReturn(0);
322ff14e315SSatish Balay }
323ff14e315SSatish Balay 
324637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
3258572280aSBarry Smith {
3268572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
3278572280aSBarry Smith   PetscErrorCode ierr;
3288572280aSBarry Smith 
3298572280aSBarry Smith   PetscFunctionBegin;
3308572280aSBarry Smith   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
3318572280aSBarry Smith   PetscFunctionReturn(0);
3328572280aSBarry Smith }
3338572280aSBarry Smith 
3346947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
3356947451fSStefano Zampini {
3366947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
3376947451fSStefano Zampini   PetscErrorCode ierr;
3386947451fSStefano Zampini 
3396947451fSStefano Zampini   PetscFunctionBegin;
3406947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
3416947451fSStefano Zampini   PetscFunctionReturn(0);
3426947451fSStefano Zampini }
3436947451fSStefano Zampini 
344dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
3458965ea79SLois Curfman McInnes {
34639ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
347dfbe8321SBarry Smith   PetscErrorCode ierr;
34813f74950SBarry Smith   PetscInt       nstash,reallocs;
3498965ea79SLois Curfman McInnes 
3503a40ed3dSBarry Smith   PetscFunctionBegin;
351910cf402Sprj-   if (mdn->donotstash || mat->nooffprocentries) return(0);
3528965ea79SLois Curfman McInnes 
353d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
3548798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
355ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
3563a40ed3dSBarry Smith   PetscFunctionReturn(0);
3578965ea79SLois Curfman McInnes }
3588965ea79SLois Curfman McInnes 
359dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
3608965ea79SLois Curfman McInnes {
36139ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
3626849ba73SBarry Smith   PetscErrorCode ierr;
36313f74950SBarry Smith   PetscInt       i,*row,*col,flg,j,rstart,ncols;
36413f74950SBarry Smith   PetscMPIInt    n;
36587828ca2SBarry Smith   PetscScalar    *val;
3668965ea79SLois Curfman McInnes 
3673a40ed3dSBarry Smith   PetscFunctionBegin;
368910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
3698965ea79SLois Curfman McInnes     /*  wait on receives */
3707ef1d9bdSSatish Balay     while (1) {
3718798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
3727ef1d9bdSSatish Balay       if (!flg) break;
3738965ea79SLois Curfman McInnes 
3747ef1d9bdSSatish Balay       for (i=0; i<n;) {
3757ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
3762205254eSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
3772205254eSKarl Rupp           if (row[j] != rstart) break;
3782205254eSKarl Rupp         }
3797ef1d9bdSSatish Balay         if (j < n) ncols = j-i;
3807ef1d9bdSSatish Balay         else       ncols = n-i;
3817ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
3824b4eb8d3SJed Brown         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
3837ef1d9bdSSatish Balay         i    = j;
3848965ea79SLois Curfman McInnes       }
3857ef1d9bdSSatish Balay     }
3868798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
387910cf402Sprj-   }
3888965ea79SLois Curfman McInnes 
38939ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
39039ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
3918965ea79SLois Curfman McInnes 
3926d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
39339ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
3948965ea79SLois Curfman McInnes   }
3953a40ed3dSBarry Smith   PetscFunctionReturn(0);
3968965ea79SLois Curfman McInnes }
3978965ea79SLois Curfman McInnes 
398dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
3998965ea79SLois Curfman McInnes {
400dfbe8321SBarry Smith   PetscErrorCode ierr;
40139ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
4023a40ed3dSBarry Smith 
4033a40ed3dSBarry Smith   PetscFunctionBegin;
4043a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
4053a40ed3dSBarry Smith   PetscFunctionReturn(0);
4068965ea79SLois Curfman McInnes }
4078965ea79SLois Curfman McInnes 
408637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
4098965ea79SLois Curfman McInnes {
41039ddd567SLois Curfman McInnes   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
4116849ba73SBarry Smith   PetscErrorCode    ierr;
412637a0070SStefano Zampini   PetscInt          i,len,*lrows;
413637a0070SStefano Zampini 
414637a0070SStefano Zampini   PetscFunctionBegin;
415637a0070SStefano Zampini   /* get locally owned rows */
416637a0070SStefano Zampini   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
417637a0070SStefano Zampini   /* fix right hand side if needed */
418637a0070SStefano Zampini   if (x && b) {
41997b48c8fSBarry Smith     const PetscScalar *xx;
42097b48c8fSBarry Smith     PetscScalar       *bb;
4218965ea79SLois Curfman McInnes 
42297b48c8fSBarry Smith     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
423637a0070SStefano Zampini     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
424637a0070SStefano Zampini     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
42597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
426637a0070SStefano Zampini     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
42797b48c8fSBarry Smith   }
428637a0070SStefano Zampini   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
429e2eb51b1SBarry Smith   if (diag != 0.0) {
430637a0070SStefano Zampini     Vec d;
431b9679d65SBarry Smith 
432637a0070SStefano Zampini     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
433637a0070SStefano Zampini     ierr = VecSet(d,diag);CHKERRQ(ierr);
434637a0070SStefano Zampini     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
435637a0070SStefano Zampini     ierr = VecDestroy(&d);CHKERRQ(ierr);
436b9679d65SBarry Smith   }
437606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4383a40ed3dSBarry Smith   PetscFunctionReturn(0);
4398965ea79SLois Curfman McInnes }
4408965ea79SLois Curfman McInnes 
441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
442cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
443cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
444cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
445cc2e6a90SBarry Smith 
446dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4478965ea79SLois Curfman McInnes {
44839ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
449dfbe8321SBarry Smith   PetscErrorCode    ierr;
450637a0070SStefano Zampini   const PetscScalar *ax;
451637a0070SStefano Zampini   PetscScalar       *ay;
452c456f294SBarry Smith 
4533a40ed3dSBarry Smith   PetscFunctionBegin;
454637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
455637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
456637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
457637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
458637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
459637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
460637a0070SStefano Zampini   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4613a40ed3dSBarry Smith   PetscFunctionReturn(0);
4628965ea79SLois Curfman McInnes }
4638965ea79SLois Curfman McInnes 
464dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4658965ea79SLois Curfman McInnes {
46639ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
467dfbe8321SBarry Smith   PetscErrorCode    ierr;
468637a0070SStefano Zampini   const PetscScalar *ax;
469637a0070SStefano Zampini   PetscScalar       *ay;
470c456f294SBarry Smith 
4713a40ed3dSBarry Smith   PetscFunctionBegin;
472637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
473637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
475637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
476637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
477637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
478637a0070SStefano Zampini   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
4793a40ed3dSBarry Smith   PetscFunctionReturn(0);
4808965ea79SLois Curfman McInnes }
4818965ea79SLois Curfman McInnes 
482dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
483096963f5SLois Curfman McInnes {
484096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
485dfbe8321SBarry Smith   PetscErrorCode    ierr;
486637a0070SStefano Zampini   const PetscScalar *ax;
487637a0070SStefano Zampini   PetscScalar       *ay;
488096963f5SLois Curfman McInnes 
4893a40ed3dSBarry Smith   PetscFunctionBegin;
490637a0070SStefano Zampini   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
491637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
492637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
493637a0070SStefano Zampini   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
494637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
495637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
496637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
497637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
4983a40ed3dSBarry Smith   PetscFunctionReturn(0);
499096963f5SLois Curfman McInnes }
500096963f5SLois Curfman McInnes 
501dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502096963f5SLois Curfman McInnes {
503096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504dfbe8321SBarry Smith   PetscErrorCode    ierr;
505637a0070SStefano Zampini   const PetscScalar *ax;
506637a0070SStefano Zampini   PetscScalar       *ay;
507096963f5SLois Curfman McInnes 
5083a40ed3dSBarry Smith   PetscFunctionBegin;
5093501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
510637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
511637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
512637a0070SStefano Zampini   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
513637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
515637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
516637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
5173a40ed3dSBarry Smith   PetscFunctionReturn(0);
518096963f5SLois Curfman McInnes }
519096963f5SLois Curfman McInnes 
520dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
5218965ea79SLois Curfman McInnes {
52239ddd567SLois Curfman McInnes   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
523dfbe8321SBarry Smith   PetscErrorCode    ierr;
524637a0070SStefano Zampini   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
52587828ca2SBarry Smith   PetscScalar       *x,zero = 0.0;
526637a0070SStefano Zampini   const PetscScalar *av;
527ed3cc1f0SBarry Smith 
5283a40ed3dSBarry Smith   PetscFunctionBegin;
5292dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
5301ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
531096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
532e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534d0f46423SBarry Smith   radd = A->rmap->rstart*m;
535637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
536637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
53744cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
538637a0070SStefano Zampini     x[i] = av[radd + i*lda + i];
539096963f5SLois Curfman McInnes   }
540637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
5411ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5423a40ed3dSBarry Smith   PetscFunctionReturn(0);
5438965ea79SLois Curfman McInnes }
5448965ea79SLois Curfman McInnes 
545dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5468965ea79SLois Curfman McInnes {
5473501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548dfbe8321SBarry Smith   PetscErrorCode ierr;
549ed3cc1f0SBarry Smith 
5503a40ed3dSBarry Smith   PetscFunctionBegin;
551aa482453SBarry Smith #if defined(PETSC_USE_LOG)
552d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
5538965ea79SLois Curfman McInnes #endif
5548798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555616b8fbbSStefano Zampini   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
556616b8fbbSStefano Zampini   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
5576bf464f9SBarry Smith   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
5586bf464f9SBarry Smith   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
559637a0070SStefano Zampini   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
5606947451fSStefano Zampini   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
5615ea7661aSPierre Jolivet   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
56201b82886SBarry Smith 
563bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
5658baccfbdSHong Zhang 
56649a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
567ad16ce7aSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
5688baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
5698572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
5708572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
5718572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
5726947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
5736947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
574d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
575d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
576d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
5778baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5788baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
5798baccfbdSHong Zhang #endif
580*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
581*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
582*d24d4204SJose E. Roman #endif
583bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
5844222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5854222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
5866718818eSStefano Zampini #if defined (PETSC_HAVE_CUDA)
5876718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
5886718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
5896718818eSStefano Zampini #endif
59086aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
59186aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
5926947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
5936947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
5946947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
5956947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
5966947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
5976947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
5985ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
5995ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
6003a40ed3dSBarry Smith   PetscFunctionReturn(0);
6018965ea79SLois Curfman McInnes }
60239ddd567SLois Curfman McInnes 
60352c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
60452c5f739Sprj- 
6059804daf3SBarry Smith #include <petscdraw.h>
6066849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
6078965ea79SLois Curfman McInnes {
60839ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
609dfbe8321SBarry Smith   PetscErrorCode    ierr;
610616b8fbbSStefano Zampini   PetscMPIInt       rank;
61119fd82e9SBarry Smith   PetscViewerType   vtype;
612ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
613b0a32e0cSBarry Smith   PetscViewer       sviewer;
614f3ef73ceSBarry Smith   PetscViewerFormat format;
6158965ea79SLois Curfman McInnes 
6163a40ed3dSBarry Smith   PetscFunctionBegin;
617616b8fbbSStefano Zampini   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
618251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
62032077d6dSBarry Smith   if (iascii) {
621b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
622b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6244e220ebcSLois Curfman McInnes       MatInfo info;
625888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
6261575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
6277b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
628b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6291575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
630637a0070SStefano Zampini       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
6313a40ed3dSBarry Smith       PetscFunctionReturn(0);
632fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6333a40ed3dSBarry Smith       PetscFunctionReturn(0);
6348965ea79SLois Curfman McInnes     }
635f1af5d2fSBarry Smith   } else if (isdraw) {
636b0a32e0cSBarry Smith     PetscDraw draw;
637ace3abfcSBarry Smith     PetscBool isnull;
638f1af5d2fSBarry Smith 
639b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
640b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
641f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
642f1af5d2fSBarry Smith   }
64377ed5343SBarry Smith 
6447da1fb6eSBarry Smith   {
6458965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6468965ea79SLois Curfman McInnes     Mat         A;
647d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
648ba8c8a56SBarry Smith     PetscInt    *cols;
649ba8c8a56SBarry Smith     PetscScalar *vals;
6508965ea79SLois Curfman McInnes 
651ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
6528965ea79SLois Curfman McInnes     if (!rank) {
653f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
6543a40ed3dSBarry Smith     } else {
655f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
6568965ea79SLois Curfman McInnes     }
6577adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
6590298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
6603bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
6618965ea79SLois Curfman McInnes 
66239ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
66339ddd567SLois Curfman McInnes        but it's quick for now */
66451022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6652205254eSKarl Rupp 
6662205254eSKarl Rupp     row = mat->rmap->rstart;
6672205254eSKarl Rupp     m   = mdn->A->rmap->n;
6688965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
669ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
670ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
671ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
67239ddd567SLois Curfman McInnes       row++;
6738965ea79SLois Curfman McInnes     }
6748965ea79SLois Curfman McInnes 
6756d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6766d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6773f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
678b9b97703SBarry Smith     if (!rank) {
6791a9d3c3cSBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
6807da1fb6eSBarry Smith       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
6818965ea79SLois Curfman McInnes     }
6823f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
683b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6846bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
6858965ea79SLois Curfman McInnes   }
6863a40ed3dSBarry Smith   PetscFunctionReturn(0);
6878965ea79SLois Curfman McInnes }
6888965ea79SLois Curfman McInnes 
689dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
6908965ea79SLois Curfman McInnes {
691dfbe8321SBarry Smith   PetscErrorCode ierr;
692ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw,issocket;
6938965ea79SLois Curfman McInnes 
694433994e6SBarry Smith   PetscFunctionBegin;
695251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
696251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
697251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
698251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
6990f5bd95cSBarry Smith 
70032077d6dSBarry Smith   if (iascii || issocket || isdraw) {
701f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
7020f5bd95cSBarry Smith   } else if (isbinary) {
7038491ab44SLisandro Dalcin     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
70411aeaf0aSBarry Smith   }
7053a40ed3dSBarry Smith   PetscFunctionReturn(0);
7068965ea79SLois Curfman McInnes }
7078965ea79SLois Curfman McInnes 
708dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
7098965ea79SLois Curfman McInnes {
7103501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
7113501a2bdSLois Curfman McInnes   Mat            mdn  = mat->A;
712dfbe8321SBarry Smith   PetscErrorCode ierr;
7133966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
7148965ea79SLois Curfman McInnes 
7153a40ed3dSBarry Smith   PetscFunctionBegin;
7164e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7172205254eSKarl Rupp 
7184e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
7192205254eSKarl Rupp 
7204e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
7214e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
7228965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7234e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7244e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7254e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7264e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7274e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7288965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
7293966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7302205254eSKarl Rupp 
7314e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7324e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7334e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7344e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7354e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7368965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
7373966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7382205254eSKarl Rupp 
7394e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7404e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7414e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7424e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7434e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7448965ea79SLois Curfman McInnes   }
7454e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7464e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7474e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7483a40ed3dSBarry Smith   PetscFunctionReturn(0);
7498965ea79SLois Curfman McInnes }
7508965ea79SLois Curfman McInnes 
751ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
7528965ea79SLois Curfman McInnes {
75339ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754dfbe8321SBarry Smith   PetscErrorCode ierr;
7558965ea79SLois Curfman McInnes 
7563a40ed3dSBarry Smith   PetscFunctionBegin;
75712c028f9SKris Buschelman   switch (op) {
758512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
75912c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
76012c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
76143674050SBarry Smith     MatCheckPreallocated(A,1);
7624e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
76312c028f9SKris Buschelman     break;
76412c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
76543674050SBarry Smith     MatCheckPreallocated(A,1);
7664e0d8c25SBarry Smith     a->roworiented = flg;
7674e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
76812c028f9SKris Buschelman     break;
7694e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
77013fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
77112c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
772071fcb05SBarry Smith   case MAT_SORTED_FULL:
773290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
77412c028f9SKris Buschelman     break;
77512c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
7764e0d8c25SBarry Smith     a->donotstash = flg;
77712c028f9SKris Buschelman     break;
77877e54ba9SKris Buschelman   case MAT_SYMMETRIC:
77977e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7809a4540c5SBarry Smith   case MAT_HERMITIAN:
7819a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
782600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
7835d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
784290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
78577e54ba9SKris Buschelman     break;
78612c028f9SKris Buschelman   default:
787e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
7883a40ed3dSBarry Smith   }
7893a40ed3dSBarry Smith   PetscFunctionReturn(0);
7908965ea79SLois Curfman McInnes }
7918965ea79SLois Curfman McInnes 
792dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
7935b2fa520SLois Curfman McInnes {
7945b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
795637a0070SStefano Zampini   const PetscScalar *l;
796637a0070SStefano Zampini   PetscScalar       x,*v,*vv,*r;
797dfbe8321SBarry Smith   PetscErrorCode    ierr;
798637a0070SStefano Zampini   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
7995b2fa520SLois Curfman McInnes 
8005b2fa520SLois Curfman McInnes   PetscFunctionBegin;
801637a0070SStefano Zampini   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
802637a0070SStefano Zampini   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
80372d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
8045b2fa520SLois Curfman McInnes   if (ll) {
80572d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
806637a0070SStefano Zampini     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
807bca11509SBarry Smith     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
8085b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
8095b2fa520SLois Curfman McInnes       x = l[i];
810637a0070SStefano Zampini       v = vv + i;
811637a0070SStefano Zampini       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
8125b2fa520SLois Curfman McInnes     }
813bca11509SBarry Smith     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
814637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
8155b2fa520SLois Curfman McInnes   }
8165b2fa520SLois Curfman McInnes   if (rr) {
817637a0070SStefano Zampini     const PetscScalar *ar;
818637a0070SStefano Zampini 
819175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
820e32f2f54SBarry Smith     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
821637a0070SStefano Zampini     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
822637a0070SStefano Zampini     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
823637a0070SStefano Zampini     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
824637a0070SStefano Zampini     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
825637a0070SStefano Zampini     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
8265b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
8275b2fa520SLois Curfman McInnes       x = r[i];
828637a0070SStefano Zampini       v = vv + i*lda;
8292205254eSKarl Rupp       for (j=0; j<m; j++) (*v++) *= x;
8305b2fa520SLois Curfman McInnes     }
831637a0070SStefano Zampini     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
832637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
8335b2fa520SLois Curfman McInnes   }
834637a0070SStefano Zampini   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
8355b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8365b2fa520SLois Curfman McInnes }
8375b2fa520SLois Curfman McInnes 
838dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
839096963f5SLois Curfman McInnes {
8403501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
841dfbe8321SBarry Smith   PetscErrorCode    ierr;
84213f74950SBarry Smith   PetscInt          i,j;
843616b8fbbSStefano Zampini   PetscMPIInt       size;
844329f5518SBarry Smith   PetscReal         sum = 0.0;
845637a0070SStefano Zampini   const PetscScalar *av,*v;
8463501a2bdSLois Curfman McInnes 
8473a40ed3dSBarry Smith   PetscFunctionBegin;
848637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
849637a0070SStefano Zampini   v    = av;
850616b8fbbSStefano Zampini   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
851616b8fbbSStefano Zampini   if (size == 1) {
852064f8208SBarry Smith     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
8533501a2bdSLois Curfman McInnes   } else {
8543501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
855d0f46423SBarry Smith       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
856329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
8573501a2bdSLois Curfman McInnes       }
858b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
8598f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
860dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
8613a40ed3dSBarry Smith     } else if (type == NORM_1) {
862329f5518SBarry Smith       PetscReal *tmp,*tmp2;
863580bdb30SBarry Smith       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
864064f8208SBarry Smith       *nrm = 0.0;
865637a0070SStefano Zampini       v    = av;
866d0f46423SBarry Smith       for (j=0; j<mdn->A->cmap->n; j++) {
867d0f46423SBarry Smith         for (i=0; i<mdn->A->rmap->n; i++) {
86867e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
8693501a2bdSLois Curfman McInnes         }
8703501a2bdSLois Curfman McInnes       }
871b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
872d0f46423SBarry Smith       for (j=0; j<A->cmap->N; j++) {
873064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8743501a2bdSLois Curfman McInnes       }
8758627564fSBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
876d0f46423SBarry Smith       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
8773a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
878329f5518SBarry Smith       PetscReal ntemp;
8793501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
880b2566f29SBarry Smith       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
881ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
8823501a2bdSLois Curfman McInnes   }
883637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
8843a40ed3dSBarry Smith   PetscFunctionReturn(0);
8853501a2bdSLois Curfman McInnes }
8863501a2bdSLois Curfman McInnes 
887fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
8883501a2bdSLois Curfman McInnes {
8893501a2bdSLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
8903501a2bdSLois Curfman McInnes   Mat            B;
891d0f46423SBarry Smith   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
8926849ba73SBarry Smith   PetscErrorCode ierr;
893637a0070SStefano Zampini   PetscInt       j,i,lda;
89487828ca2SBarry Smith   PetscScalar    *v;
8953501a2bdSLois Curfman McInnes 
8963a40ed3dSBarry Smith   PetscFunctionBegin;
897cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
898ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
899d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
9007adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
9010298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
902637a0070SStefano Zampini   } else B = *matout;
9033501a2bdSLois Curfman McInnes 
904637a0070SStefano Zampini   m    = a->A->rmap->n; n = a->A->cmap->n;
905637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
906637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
907785e854fSJed Brown   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
9083501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
9091acff37aSSatish Balay   for (j=0; j<n; j++) {
9103501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
911637a0070SStefano Zampini     v   += lda;
9123501a2bdSLois Curfman McInnes   }
913637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
914606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
9156d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9166d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
917cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9183501a2bdSLois Curfman McInnes     *matout = B;
9193501a2bdSLois Curfman McInnes   } else {
92028be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
9213501a2bdSLois Curfman McInnes   }
9223a40ed3dSBarry Smith   PetscFunctionReturn(0);
923096963f5SLois Curfman McInnes }
924096963f5SLois Curfman McInnes 
9256849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
92652c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
9278965ea79SLois Curfman McInnes 
9284994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A)
929273d9f13SBarry Smith {
930dfbe8321SBarry Smith   PetscErrorCode ierr;
931273d9f13SBarry Smith 
932273d9f13SBarry Smith   PetscFunctionBegin;
93318992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
93418992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
93518992e5dSStefano Zampini   if (!A->preallocated) {
936273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
93718992e5dSStefano Zampini   }
938273d9f13SBarry Smith   PetscFunctionReturn(0);
939273d9f13SBarry Smith }
940273d9f13SBarry Smith 
941488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
942488007eeSBarry Smith {
943488007eeSBarry Smith   PetscErrorCode ierr;
944488007eeSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
945488007eeSBarry Smith 
946488007eeSBarry Smith   PetscFunctionBegin;
947488007eeSBarry Smith   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
948488007eeSBarry Smith   PetscFunctionReturn(0);
949488007eeSBarry Smith }
950488007eeSBarry Smith 
9517087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat)
952ba337c44SJed Brown {
953ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
954ba337c44SJed Brown   PetscErrorCode ierr;
955ba337c44SJed Brown 
956ba337c44SJed Brown   PetscFunctionBegin;
957ba337c44SJed Brown   ierr = MatConjugate(a->A);CHKERRQ(ierr);
958ba337c44SJed Brown   PetscFunctionReturn(0);
959ba337c44SJed Brown }
960ba337c44SJed Brown 
961ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A)
962ba337c44SJed Brown {
963ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
964ba337c44SJed Brown   PetscErrorCode ierr;
965ba337c44SJed Brown 
966ba337c44SJed Brown   PetscFunctionBegin;
967ba337c44SJed Brown   ierr = MatRealPart(a->A);CHKERRQ(ierr);
968ba337c44SJed Brown   PetscFunctionReturn(0);
969ba337c44SJed Brown }
970ba337c44SJed Brown 
971ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
972ba337c44SJed Brown {
973ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
974ba337c44SJed Brown   PetscErrorCode ierr;
975ba337c44SJed Brown 
976ba337c44SJed Brown   PetscFunctionBegin;
977ba337c44SJed Brown   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
978ba337c44SJed Brown   PetscFunctionReturn(0);
979ba337c44SJed Brown }
980ba337c44SJed Brown 
98149a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
98249a6ff4bSBarry Smith {
98349a6ff4bSBarry Smith   PetscErrorCode ierr;
984637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
98549a6ff4bSBarry Smith 
98649a6ff4bSBarry Smith   PetscFunctionBegin;
987637a0070SStefano Zampini   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
988637a0070SStefano Zampini   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
989637a0070SStefano Zampini   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
99049a6ff4bSBarry Smith   PetscFunctionReturn(0);
99149a6ff4bSBarry Smith }
99249a6ff4bSBarry Smith 
99352c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
99452c5f739Sprj- 
9950716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
9960716a85fSBarry Smith {
9970716a85fSBarry Smith   PetscErrorCode ierr;
9980716a85fSBarry Smith   PetscInt       i,n;
9990716a85fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
10000716a85fSBarry Smith   PetscReal      *work;
10010716a85fSBarry Smith 
10020716a85fSBarry Smith   PetscFunctionBegin;
10030298fd71SBarry Smith   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1004785e854fSJed Brown   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
10050716a85fSBarry Smith   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
10060716a85fSBarry Smith   if (type == NORM_2) {
10070716a85fSBarry Smith     for (i=0; i<n; i++) work[i] *= work[i];
10080716a85fSBarry Smith   }
10090716a85fSBarry Smith   if (type == NORM_INFINITY) {
1010b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
10110716a85fSBarry Smith   } else {
1012b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
10130716a85fSBarry Smith   }
10140716a85fSBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
10150716a85fSBarry Smith   if (type == NORM_2) {
10168f1a2a5eSBarry Smith     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
10170716a85fSBarry Smith   }
10180716a85fSBarry Smith   PetscFunctionReturn(0);
10190716a85fSBarry Smith }
10200716a85fSBarry Smith 
1021637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
10226947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10236947451fSStefano Zampini {
10246947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10256947451fSStefano Zampini   PetscErrorCode ierr;
10266947451fSStefano Zampini   PetscInt       lda;
10276947451fSStefano Zampini 
10286947451fSStefano Zampini   PetscFunctionBegin;
1029616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1030616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
10316947451fSStefano Zampini   if (!a->cvec) {
10326947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1033616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
10346947451fSStefano Zampini   }
10356947451fSStefano Zampini   a->vecinuse = col + 1;
10366947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
10376947451fSStefano Zampini   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
10386947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
10396947451fSStefano Zampini   *v   = a->cvec;
10406947451fSStefano Zampini   PetscFunctionReturn(0);
10416947451fSStefano Zampini }
10426947451fSStefano Zampini 
10436947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10446947451fSStefano Zampini {
10456947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10466947451fSStefano Zampini   PetscErrorCode ierr;
10476947451fSStefano Zampini 
10486947451fSStefano Zampini   PetscFunctionBegin;
10495ea7661aSPierre Jolivet   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
10506947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
10516947451fSStefano Zampini   a->vecinuse = 0;
10526947451fSStefano Zampini   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
10536947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
10546947451fSStefano Zampini   *v   = NULL;
10556947451fSStefano Zampini   PetscFunctionReturn(0);
10566947451fSStefano Zampini }
10576947451fSStefano Zampini 
10586947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10596947451fSStefano Zampini {
10606947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10616947451fSStefano Zampini   PetscInt       lda;
10626947451fSStefano Zampini   PetscErrorCode ierr;
10636947451fSStefano Zampini 
10646947451fSStefano Zampini   PetscFunctionBegin;
1065616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1066616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
10676947451fSStefano Zampini   if (!a->cvec) {
10686947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1069616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
10706947451fSStefano Zampini   }
10716947451fSStefano Zampini   a->vecinuse = col + 1;
10726947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
10736947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
10746947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
10756947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
10766947451fSStefano Zampini   *v   = a->cvec;
10776947451fSStefano Zampini   PetscFunctionReturn(0);
10786947451fSStefano Zampini }
10796947451fSStefano Zampini 
10806947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10816947451fSStefano Zampini {
10826947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10836947451fSStefano Zampini   PetscErrorCode ierr;
10846947451fSStefano Zampini 
10856947451fSStefano Zampini   PetscFunctionBegin;
1086616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1087616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
10886947451fSStefano Zampini   a->vecinuse = 0;
10896947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
10906947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
10916947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
10926947451fSStefano Zampini   *v   = NULL;
10936947451fSStefano Zampini   PetscFunctionReturn(0);
10946947451fSStefano Zampini }
10956947451fSStefano Zampini 
10966947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
10976947451fSStefano Zampini {
10986947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
10996947451fSStefano Zampini   PetscErrorCode ierr;
11006947451fSStefano Zampini   PetscInt       lda;
11016947451fSStefano Zampini 
11026947451fSStefano Zampini   PetscFunctionBegin;
1103616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1104616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
11056947451fSStefano Zampini   if (!a->cvec) {
11066947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1107616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
11086947451fSStefano Zampini   }
11096947451fSStefano Zampini   a->vecinuse = col + 1;
11106947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
11116947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
11126947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
11136947451fSStefano Zampini   *v   = a->cvec;
11146947451fSStefano Zampini   PetscFunctionReturn(0);
11156947451fSStefano Zampini }
11166947451fSStefano Zampini 
11176947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
11186947451fSStefano Zampini {
11196947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
11206947451fSStefano Zampini   PetscErrorCode ierr;
11216947451fSStefano Zampini 
11226947451fSStefano Zampini   PetscFunctionBegin;
1123616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1124616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
11256947451fSStefano Zampini   a->vecinuse = 0;
11266947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
11276947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
11286947451fSStefano Zampini   *v   = NULL;
11296947451fSStefano Zampini   PetscFunctionReturn(0);
11306947451fSStefano Zampini }
11316947451fSStefano Zampini 
1132637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1133637a0070SStefano Zampini {
1134637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1135637a0070SStefano Zampini   PetscErrorCode ierr;
1136637a0070SStefano Zampini 
1137637a0070SStefano Zampini   PetscFunctionBegin;
1138616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1139616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1140637a0070SStefano Zampini   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1141637a0070SStefano Zampini   PetscFunctionReturn(0);
1142637a0070SStefano Zampini }
1143637a0070SStefano Zampini 
1144637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1145637a0070SStefano Zampini {
1146637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1147637a0070SStefano Zampini   PetscErrorCode ierr;
1148637a0070SStefano Zampini 
1149637a0070SStefano Zampini   PetscFunctionBegin;
1150616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1151616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1152637a0070SStefano Zampini   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1153637a0070SStefano Zampini   PetscFunctionReturn(0);
1154637a0070SStefano Zampini }
1155637a0070SStefano Zampini 
1156d5ea218eSStefano Zampini static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1157d5ea218eSStefano Zampini {
1158d5ea218eSStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1159d5ea218eSStefano Zampini   PetscErrorCode ierr;
1160d5ea218eSStefano Zampini 
1161d5ea218eSStefano Zampini   PetscFunctionBegin;
1162616b8fbbSStefano Zampini   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1163616b8fbbSStefano Zampini   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1164d5ea218eSStefano Zampini   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1165d5ea218eSStefano Zampini   PetscFunctionReturn(0);
1166d5ea218eSStefano Zampini }
1167d5ea218eSStefano Zampini 
1168637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1169637a0070SStefano Zampini {
1170637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1171637a0070SStefano Zampini   PetscErrorCode ierr;
1172637a0070SStefano Zampini 
1173637a0070SStefano Zampini   PetscFunctionBegin;
1174637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1175637a0070SStefano Zampini   PetscFunctionReturn(0);
1176637a0070SStefano Zampini }
1177637a0070SStefano Zampini 
1178637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1179637a0070SStefano Zampini {
1180637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1181637a0070SStefano Zampini   PetscErrorCode ierr;
1182637a0070SStefano Zampini 
1183637a0070SStefano Zampini   PetscFunctionBegin;
1184637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1185637a0070SStefano Zampini   PetscFunctionReturn(0);
1186637a0070SStefano Zampini }
1187637a0070SStefano Zampini 
1188637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1189637a0070SStefano Zampini {
1190637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191637a0070SStefano Zampini   PetscErrorCode ierr;
1192637a0070SStefano Zampini 
1193637a0070SStefano Zampini   PetscFunctionBegin;
1194637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1195637a0070SStefano Zampini   PetscFunctionReturn(0);
1196637a0070SStefano Zampini }
1197637a0070SStefano Zampini 
1198637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1199637a0070SStefano Zampini {
1200637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1201637a0070SStefano Zampini   PetscErrorCode ierr;
1202637a0070SStefano Zampini 
1203637a0070SStefano Zampini   PetscFunctionBegin;
1204637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1205637a0070SStefano Zampini   PetscFunctionReturn(0);
1206637a0070SStefano Zampini }
1207637a0070SStefano Zampini 
1208637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1209637a0070SStefano Zampini {
1210637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211637a0070SStefano Zampini   PetscErrorCode ierr;
1212637a0070SStefano Zampini 
1213637a0070SStefano Zampini   PetscFunctionBegin;
1214637a0070SStefano Zampini   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1215637a0070SStefano Zampini   PetscFunctionReturn(0);
1216637a0070SStefano Zampini }
1217637a0070SStefano Zampini 
1218637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1219637a0070SStefano Zampini {
1220637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1221637a0070SStefano Zampini   PetscErrorCode ierr;
1222637a0070SStefano Zampini 
1223637a0070SStefano Zampini   PetscFunctionBegin;
1224637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1225637a0070SStefano Zampini   PetscFunctionReturn(0);
1226637a0070SStefano Zampini }
1227637a0070SStefano Zampini 
12286947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
12296947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
12306947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
12316947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
12326947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
12336947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
12345ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
12356947451fSStefano Zampini 
1236637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1237637a0070SStefano Zampini {
1238637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1239637a0070SStefano Zampini   PetscErrorCode ierr;
1240637a0070SStefano Zampini 
1241637a0070SStefano Zampini   PetscFunctionBegin;
1242616b8fbbSStefano Zampini   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1243616b8fbbSStefano Zampini   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1244637a0070SStefano Zampini   if (d->A) {
1245637a0070SStefano Zampini     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1246637a0070SStefano Zampini   }
1247637a0070SStefano Zampini   mat->boundtocpu = bind;
12486947451fSStefano Zampini   if (!bind) {
12496947451fSStefano Zampini     PetscBool iscuda;
12506947451fSStefano Zampini 
12516947451fSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
12526947451fSStefano Zampini     if (!iscuda) {
12536947451fSStefano Zampini       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1254616b8fbbSStefano Zampini     }
1255616b8fbbSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1256616b8fbbSStefano Zampini     if (!iscuda) {
12575ea7661aSPierre Jolivet       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
12586947451fSStefano Zampini     }
12596947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
12606947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
12616947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
12626947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
12636947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
12646947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
12656947451fSStefano Zampini   } else {
12666947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
12676947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
12686947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
12696947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
12706947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
12716947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1272616b8fbbSStefano Zampini   }
1273616b8fbbSStefano Zampini   if (d->cmat) {
1274616b8fbbSStefano Zampini     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
12756947451fSStefano Zampini   }
1276637a0070SStefano Zampini   PetscFunctionReturn(0);
1277637a0070SStefano Zampini }
1278637a0070SStefano Zampini 
1279637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1280637a0070SStefano Zampini {
1281637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1282637a0070SStefano Zampini   PetscErrorCode ierr;
1283637a0070SStefano Zampini   PetscBool      iscuda;
1284637a0070SStefano Zampini 
1285637a0070SStefano Zampini   PetscFunctionBegin;
1286d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1287637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1288637a0070SStefano Zampini   if (!iscuda) PetscFunctionReturn(0);
1289637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1290637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1291637a0070SStefano Zampini   if (!d->A) {
1292637a0070SStefano Zampini     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1293637a0070SStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1294637a0070SStefano Zampini     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1295637a0070SStefano Zampini   }
1296637a0070SStefano Zampini   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1297637a0070SStefano Zampini   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1298637a0070SStefano Zampini   A->preallocated = PETSC_TRUE;
1299637a0070SStefano Zampini   PetscFunctionReturn(0);
1300637a0070SStefano Zampini }
1301637a0070SStefano Zampini #endif
1302637a0070SStefano Zampini 
130373a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
130473a71a0fSBarry Smith {
130573a71a0fSBarry Smith   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
130673a71a0fSBarry Smith   PetscErrorCode ierr;
130773a71a0fSBarry Smith 
130873a71a0fSBarry Smith   PetscFunctionBegin;
1309637a0070SStefano Zampini   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
131073a71a0fSBarry Smith   PetscFunctionReturn(0);
131173a71a0fSBarry Smith }
131273a71a0fSBarry Smith 
13133b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
13143b49f96aSBarry Smith {
13153b49f96aSBarry Smith   PetscFunctionBegin;
13163b49f96aSBarry Smith   *missing = PETSC_FALSE;
13173b49f96aSBarry Smith   PetscFunctionReturn(0);
13183b49f96aSBarry Smith }
13193b49f96aSBarry Smith 
13204222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1321cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
13226718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
13236718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
13246718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
13256718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1326cc48ffa7SToby Isaac 
13278965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
132809dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
132909dc0095SBarry Smith                                         MatGetRow_MPIDense,
133009dc0095SBarry Smith                                         MatRestoreRow_MPIDense,
133109dc0095SBarry Smith                                         MatMult_MPIDense,
133297304618SKris Buschelman                                 /*  4*/ MatMultAdd_MPIDense,
13337c922b88SBarry Smith                                         MatMultTranspose_MPIDense,
13347c922b88SBarry Smith                                         MatMultTransposeAdd_MPIDense,
13358965ea79SLois Curfman McInnes                                         0,
133609dc0095SBarry Smith                                         0,
133709dc0095SBarry Smith                                         0,
133897304618SKris Buschelman                                 /* 10*/ 0,
133909dc0095SBarry Smith                                         0,
134009dc0095SBarry Smith                                         0,
134109dc0095SBarry Smith                                         0,
134209dc0095SBarry Smith                                         MatTranspose_MPIDense,
134397304618SKris Buschelman                                 /* 15*/ MatGetInfo_MPIDense,
13446e4ee0c6SHong Zhang                                         MatEqual_MPIDense,
134509dc0095SBarry Smith                                         MatGetDiagonal_MPIDense,
13465b2fa520SLois Curfman McInnes                                         MatDiagonalScale_MPIDense,
134709dc0095SBarry Smith                                         MatNorm_MPIDense,
134897304618SKris Buschelman                                 /* 20*/ MatAssemblyBegin_MPIDense,
134909dc0095SBarry Smith                                         MatAssemblyEnd_MPIDense,
135009dc0095SBarry Smith                                         MatSetOption_MPIDense,
135109dc0095SBarry Smith                                         MatZeroEntries_MPIDense,
1352d519adbfSMatthew Knepley                                 /* 24*/ MatZeroRows_MPIDense,
1353919b68f7SBarry Smith                                         0,
135401b82886SBarry Smith                                         0,
135501b82886SBarry Smith                                         0,
135601b82886SBarry Smith                                         0,
13574994cf47SJed Brown                                 /* 29*/ MatSetUp_MPIDense,
1358273d9f13SBarry Smith                                         0,
135909dc0095SBarry Smith                                         0,
1360c56a70eeSBarry Smith                                         MatGetDiagonalBlock_MPIDense,
13618c778c55SBarry Smith                                         0,
1362d519adbfSMatthew Knepley                                 /* 34*/ MatDuplicate_MPIDense,
136309dc0095SBarry Smith                                         0,
136409dc0095SBarry Smith                                         0,
136509dc0095SBarry Smith                                         0,
136609dc0095SBarry Smith                                         0,
1367d519adbfSMatthew Knepley                                 /* 39*/ MatAXPY_MPIDense,
13687dae84e0SHong Zhang                                         MatCreateSubMatrices_MPIDense,
136909dc0095SBarry Smith                                         0,
137009dc0095SBarry Smith                                         MatGetValues_MPIDense,
137109dc0095SBarry Smith                                         0,
1372d519adbfSMatthew Knepley                                 /* 44*/ 0,
137309dc0095SBarry Smith                                         MatScale_MPIDense,
13747d68702bSBarry Smith                                         MatShift_Basic,
137509dc0095SBarry Smith                                         0,
137609dc0095SBarry Smith                                         0,
137773a71a0fSBarry Smith                                 /* 49*/ MatSetRandom_MPIDense,
137809dc0095SBarry Smith                                         0,
137909dc0095SBarry Smith                                         0,
138009dc0095SBarry Smith                                         0,
138109dc0095SBarry Smith                                         0,
1382d519adbfSMatthew Knepley                                 /* 54*/ 0,
138309dc0095SBarry Smith                                         0,
138409dc0095SBarry Smith                                         0,
138509dc0095SBarry Smith                                         0,
138609dc0095SBarry Smith                                         0,
13877dae84e0SHong Zhang                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1388b9b97703SBarry Smith                                         MatDestroy_MPIDense,
1389b9b97703SBarry Smith                                         MatView_MPIDense,
1390357abbc8SBarry Smith                                         0,
139197304618SKris Buschelman                                         0,
1392d519adbfSMatthew Knepley                                 /* 64*/ 0,
139397304618SKris Buschelman                                         0,
139497304618SKris Buschelman                                         0,
139597304618SKris Buschelman                                         0,
139697304618SKris Buschelman                                         0,
1397d519adbfSMatthew Knepley                                 /* 69*/ 0,
139897304618SKris Buschelman                                         0,
139997304618SKris Buschelman                                         0,
140097304618SKris Buschelman                                         0,
140197304618SKris Buschelman                                         0,
1402d519adbfSMatthew Knepley                                 /* 74*/ 0,
140397304618SKris Buschelman                                         0,
140497304618SKris Buschelman                                         0,
140597304618SKris Buschelman                                         0,
140697304618SKris Buschelman                                         0,
1407d519adbfSMatthew Knepley                                 /* 79*/ 0,
140897304618SKris Buschelman                                         0,
140997304618SKris Buschelman                                         0,
141097304618SKris Buschelman                                         0,
14115bba2384SShri Abhyankar                                 /* 83*/ MatLoad_MPIDense,
1412865e5f61SKris Buschelman                                         0,
1413865e5f61SKris Buschelman                                         0,
1414865e5f61SKris Buschelman                                         0,
1415865e5f61SKris Buschelman                                         0,
1416865e5f61SKris Buschelman                                         0,
14174222ddf1SHong Zhang                                 /* 89*/ 0,
14184222ddf1SHong Zhang                                         0,
14196718818eSStefano Zampini                                         0,
14202fbe02b9SBarry Smith                                         0,
1421ba337c44SJed Brown                                         0,
1422d519adbfSMatthew Knepley                                 /* 94*/ 0,
14234222ddf1SHong Zhang                                         0,
14246718818eSStefano Zampini                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1425cc48ffa7SToby Isaac                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1426ba337c44SJed Brown                                         0,
14274222ddf1SHong Zhang                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1428ba337c44SJed Brown                                         0,
1429ba337c44SJed Brown                                         0,
1430ba337c44SJed Brown                                         MatConjugate_MPIDense,
1431ba337c44SJed Brown                                         0,
1432ba337c44SJed Brown                                 /*104*/ 0,
1433ba337c44SJed Brown                                         MatRealPart_MPIDense,
1434ba337c44SJed Brown                                         MatImaginaryPart_MPIDense,
143586d161a7SShri Abhyankar                                         0,
143686d161a7SShri Abhyankar                                         0,
143786d161a7SShri Abhyankar                                 /*109*/ 0,
143886d161a7SShri Abhyankar                                         0,
143986d161a7SShri Abhyankar                                         0,
144049a6ff4bSBarry Smith                                         MatGetColumnVector_MPIDense,
14413b49f96aSBarry Smith                                         MatMissingDiagonal_MPIDense,
144286d161a7SShri Abhyankar                                 /*114*/ 0,
144386d161a7SShri Abhyankar                                         0,
144486d161a7SShri Abhyankar                                         0,
144586d161a7SShri Abhyankar                                         0,
144686d161a7SShri Abhyankar                                         0,
144786d161a7SShri Abhyankar                                 /*119*/ 0,
144886d161a7SShri Abhyankar                                         0,
144986d161a7SShri Abhyankar                                         0,
14500716a85fSBarry Smith                                         0,
14510716a85fSBarry Smith                                         0,
14520716a85fSBarry Smith                                 /*124*/ 0,
14533964eb88SJed Brown                                         MatGetColumnNorms_MPIDense,
14543964eb88SJed Brown                                         0,
14553964eb88SJed Brown                                         0,
14563964eb88SJed Brown                                         0,
14573964eb88SJed Brown                                 /*129*/ 0,
14584222ddf1SHong Zhang                                         0,
14596718818eSStefano Zampini                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1460cb20be35SHong Zhang                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
14613964eb88SJed Brown                                         0,
14623964eb88SJed Brown                                 /*134*/ 0,
14633964eb88SJed Brown                                         0,
14643964eb88SJed Brown                                         0,
14653964eb88SJed Brown                                         0,
14663964eb88SJed Brown                                         0,
14673964eb88SJed Brown                                 /*139*/ 0,
1468f9426fe0SMark Adams                                         0,
146994e2cb23SJakub Kruzik                                         0,
147094e2cb23SJakub Kruzik                                         0,
147194e2cb23SJakub Kruzik                                         0,
14724222ddf1SHong Zhang                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
14734222ddf1SHong Zhang                                 /*145*/ 0,
14744222ddf1SHong Zhang                                         0,
14754222ddf1SHong Zhang                                         0
1476ba337c44SJed Brown };
14778965ea79SLois Curfman McInnes 
14787087cfbeSBarry Smith PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1479a23d5eceSKris Buschelman {
1480637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1481637a0070SStefano Zampini   PetscBool      iscuda;
1482dfbe8321SBarry Smith   PetscErrorCode ierr;
1483a23d5eceSKris Buschelman 
1484a23d5eceSKris Buschelman   PetscFunctionBegin;
1485616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
148634ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
148734ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1488637a0070SStefano Zampini   if (!a->A) {
1489f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
14903bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1491637a0070SStefano Zampini     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1492637a0070SStefano Zampini   }
1493637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1494637a0070SStefano Zampini   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1495637a0070SStefano Zampini   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1496637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
1497a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1498a23d5eceSKris Buschelman }
1499a23d5eceSKris Buschelman 
150065b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1501cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
15028baccfbdSHong Zhang {
15038ea901baSHong Zhang   Mat            mat_elemental;
15048ea901baSHong Zhang   PetscErrorCode ierr;
150532d7a744SHong Zhang   PetscScalar    *v;
150632d7a744SHong Zhang   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
15078ea901baSHong Zhang 
15088baccfbdSHong Zhang   PetscFunctionBegin;
1509378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1510378336b6SHong Zhang     mat_elemental = *newmat;
1511378336b6SHong Zhang     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1512378336b6SHong Zhang   } else {
1513378336b6SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1514378336b6SHong Zhang     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1515378336b6SHong Zhang     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1516378336b6SHong Zhang     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
151732d7a744SHong Zhang     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1518378336b6SHong Zhang   }
1519378336b6SHong Zhang 
152032d7a744SHong Zhang   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
152132d7a744SHong Zhang   for (i=0; i<N; i++) cols[i] = i;
152232d7a744SHong Zhang   for (i=0; i<m; i++) rows[i] = rstart + i;
15238ea901baSHong Zhang 
1524637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
152532d7a744SHong Zhang   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
152632d7a744SHong Zhang   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
15278ea901baSHong Zhang   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15288ea901baSHong Zhang   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152932d7a744SHong Zhang   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
153032d7a744SHong Zhang   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
15318ea901baSHong Zhang 
1532511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
153328be2f97SBarry Smith     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
15348ea901baSHong Zhang   } else {
15358ea901baSHong Zhang     *newmat = mat_elemental;
15368ea901baSHong Zhang   }
15378baccfbdSHong Zhang   PetscFunctionReturn(0);
15388baccfbdSHong Zhang }
153965b80a83SHong Zhang #endif
15408baccfbdSHong Zhang 
1541af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
154286aefd0dSHong Zhang {
154386aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
154486aefd0dSHong Zhang   PetscErrorCode ierr;
154586aefd0dSHong Zhang 
154686aefd0dSHong Zhang   PetscFunctionBegin;
154786aefd0dSHong Zhang   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
154886aefd0dSHong Zhang   PetscFunctionReturn(0);
154986aefd0dSHong Zhang }
155086aefd0dSHong Zhang 
1551af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
155286aefd0dSHong Zhang {
155386aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
155486aefd0dSHong Zhang   PetscErrorCode ierr;
155586aefd0dSHong Zhang 
155686aefd0dSHong Zhang   PetscFunctionBegin;
155786aefd0dSHong Zhang   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
155886aefd0dSHong Zhang   PetscFunctionReturn(0);
155986aefd0dSHong Zhang }
156086aefd0dSHong Zhang 
156194e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
156294e2cb23SJakub Kruzik {
156394e2cb23SJakub Kruzik   PetscErrorCode ierr;
156494e2cb23SJakub Kruzik   Mat_MPIDense   *mat;
156594e2cb23SJakub Kruzik   PetscInt       m,nloc,N;
156694e2cb23SJakub Kruzik 
156794e2cb23SJakub Kruzik   PetscFunctionBegin;
156894e2cb23SJakub Kruzik   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
156994e2cb23SJakub Kruzik   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
157094e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
157194e2cb23SJakub Kruzik     PetscInt sum;
157294e2cb23SJakub Kruzik 
157394e2cb23SJakub Kruzik     if (n == PETSC_DECIDE) {
157494e2cb23SJakub Kruzik       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
157594e2cb23SJakub Kruzik     }
157694e2cb23SJakub Kruzik     /* Check sum(n) = N */
157794e2cb23SJakub Kruzik     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
157894e2cb23SJakub Kruzik     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
157994e2cb23SJakub Kruzik 
158094e2cb23SJakub Kruzik     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
158194e2cb23SJakub Kruzik   }
158294e2cb23SJakub Kruzik 
158394e2cb23SJakub Kruzik   /* numeric phase */
158494e2cb23SJakub Kruzik   mat = (Mat_MPIDense*)(*outmat)->data;
158594e2cb23SJakub Kruzik   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
158694e2cb23SJakub Kruzik   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158794e2cb23SJakub Kruzik   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158894e2cb23SJakub Kruzik   PetscFunctionReturn(0);
158994e2cb23SJakub Kruzik }
159094e2cb23SJakub Kruzik 
1591637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1592637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1593637a0070SStefano Zampini {
1594637a0070SStefano Zampini   Mat            B;
1595637a0070SStefano Zampini   Mat_MPIDense   *m;
1596637a0070SStefano Zampini   PetscErrorCode ierr;
1597637a0070SStefano Zampini 
1598637a0070SStefano Zampini   PetscFunctionBegin;
1599637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1600637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1601637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1602637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1603637a0070SStefano Zampini   }
1604637a0070SStefano Zampini 
1605637a0070SStefano Zampini   B    = *newmat;
1606637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1607637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1608637a0070SStefano Zampini   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1609637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1610637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1611637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
16126718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1613637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
16146718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1615637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1616637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1617637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1618637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1619637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1620637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1621637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1622637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1623d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1624637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1625637a0070SStefano Zampini   if (m->A) {
1626637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1627637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1628637a0070SStefano Zampini   }
1629637a0070SStefano Zampini   B->ops->bindtocpu = NULL;
1630637a0070SStefano Zampini   B->offloadmask    = PETSC_OFFLOAD_CPU;
1631637a0070SStefano Zampini   PetscFunctionReturn(0);
1632637a0070SStefano Zampini }
1633637a0070SStefano Zampini 
1634637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1635637a0070SStefano Zampini {
1636637a0070SStefano Zampini   Mat            B;
1637637a0070SStefano Zampini   Mat_MPIDense   *m;
1638637a0070SStefano Zampini   PetscErrorCode ierr;
1639637a0070SStefano Zampini 
1640637a0070SStefano Zampini   PetscFunctionBegin;
1641637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1642637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1643637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1644637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1645637a0070SStefano Zampini   }
1646637a0070SStefano Zampini 
1647637a0070SStefano Zampini   B    = *newmat;
1648637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1649637a0070SStefano Zampini   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1650637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1651637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1652637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
16536718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1654637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
16556718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1656637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1657637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1658637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1659637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1660637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1661637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1662637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1663637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1664d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1665637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1666637a0070SStefano Zampini   if (m->A) {
1667637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1668637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1669637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_BOTH;
1670637a0070SStefano Zampini   } else {
1671637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1672637a0070SStefano Zampini   }
1673637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1674637a0070SStefano Zampini 
1675637a0070SStefano Zampini   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1676637a0070SStefano Zampini   PetscFunctionReturn(0);
1677637a0070SStefano Zampini }
1678637a0070SStefano Zampini #endif
1679637a0070SStefano Zampini 
16806947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
16816947451fSStefano Zampini {
16826947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
16836947451fSStefano Zampini   PetscErrorCode ierr;
16846947451fSStefano Zampini   PetscInt       lda;
16856947451fSStefano Zampini 
16866947451fSStefano Zampini   PetscFunctionBegin;
1687616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1688616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
16896947451fSStefano Zampini   if (!a->cvec) {
16906947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
16916947451fSStefano Zampini   }
16926947451fSStefano Zampini   a->vecinuse = col + 1;
16936947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
16946947451fSStefano Zampini   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
16956947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
16966947451fSStefano Zampini   *v   = a->cvec;
16976947451fSStefano Zampini   PetscFunctionReturn(0);
16986947451fSStefano Zampini }
16996947451fSStefano Zampini 
17006947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
17016947451fSStefano Zampini {
17026947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17036947451fSStefano Zampini   PetscErrorCode ierr;
17046947451fSStefano Zampini 
17056947451fSStefano Zampini   PetscFunctionBegin;
17065ea7661aSPierre Jolivet   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
17076947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
17086947451fSStefano Zampini   a->vecinuse = 0;
17096947451fSStefano Zampini   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17106947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17116947451fSStefano Zampini   *v   = NULL;
17126947451fSStefano Zampini   PetscFunctionReturn(0);
17136947451fSStefano Zampini }
17146947451fSStefano Zampini 
17156947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
17166947451fSStefano Zampini {
17176947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17186947451fSStefano Zampini   PetscErrorCode ierr;
17196947451fSStefano Zampini   PetscInt       lda;
17206947451fSStefano Zampini 
17216947451fSStefano Zampini   PetscFunctionBegin;
1722616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1723616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17246947451fSStefano Zampini   if (!a->cvec) {
17256947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
17266947451fSStefano Zampini   }
17276947451fSStefano Zampini   a->vecinuse = col + 1;
17286947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
17296947451fSStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
17306947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
17316947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
17326947451fSStefano Zampini   *v   = a->cvec;
17336947451fSStefano Zampini   PetscFunctionReturn(0);
17346947451fSStefano Zampini }
17356947451fSStefano Zampini 
17366947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
17376947451fSStefano Zampini {
17386947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17396947451fSStefano Zampini   PetscErrorCode ierr;
17406947451fSStefano Zampini 
17416947451fSStefano Zampini   PetscFunctionBegin;
1742616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1743616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
17446947451fSStefano Zampini   a->vecinuse = 0;
17456947451fSStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
17466947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
17476947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17486947451fSStefano Zampini   *v   = NULL;
17496947451fSStefano Zampini   PetscFunctionReturn(0);
17506947451fSStefano Zampini }
17516947451fSStefano Zampini 
17526947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
17536947451fSStefano Zampini {
17546947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17556947451fSStefano Zampini   PetscErrorCode ierr;
17566947451fSStefano Zampini   PetscInt       lda;
17576947451fSStefano Zampini 
17586947451fSStefano Zampini   PetscFunctionBegin;
1759616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1760616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17616947451fSStefano Zampini   if (!a->cvec) {
17626947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
17636947451fSStefano Zampini   }
17646947451fSStefano Zampini   a->vecinuse = col + 1;
17656947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
17666947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17676947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
17686947451fSStefano Zampini   *v   = a->cvec;
17696947451fSStefano Zampini   PetscFunctionReturn(0);
17706947451fSStefano Zampini }
17716947451fSStefano Zampini 
17726947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
17736947451fSStefano Zampini {
17746947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
17756947451fSStefano Zampini   PetscErrorCode ierr;
17766947451fSStefano Zampini 
17776947451fSStefano Zampini   PetscFunctionBegin;
1778616b8fbbSStefano Zampini   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779616b8fbbSStefano Zampini   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
17806947451fSStefano Zampini   a->vecinuse = 0;
17816947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
17826947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
17836947451fSStefano Zampini   *v   = NULL;
17846947451fSStefano Zampini   PetscFunctionReturn(0);
17856947451fSStefano Zampini }
17866947451fSStefano Zampini 
17875ea7661aSPierre Jolivet PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
17885ea7661aSPierre Jolivet {
17895ea7661aSPierre Jolivet   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1790616b8fbbSStefano Zampini   Mat_MPIDense   *c;
17915ea7661aSPierre Jolivet   PetscErrorCode ierr;
1792616b8fbbSStefano Zampini   MPI_Comm       comm;
1793616b8fbbSStefano Zampini   PetscBool      setup = PETSC_FALSE;
17945ea7661aSPierre Jolivet 
17955ea7661aSPierre Jolivet   PetscFunctionBegin;
1796616b8fbbSStefano Zampini   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1797616b8fbbSStefano Zampini   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1798616b8fbbSStefano Zampini   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
17995ea7661aSPierre Jolivet   if (!a->cmat) {
1800616b8fbbSStefano Zampini     setup = PETSC_TRUE;
1801616b8fbbSStefano Zampini     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1802616b8fbbSStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1803616b8fbbSStefano Zampini     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1804616b8fbbSStefano Zampini     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1805616b8fbbSStefano Zampini     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1806616b8fbbSStefano Zampini     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1807616b8fbbSStefano Zampini   } else if (cend-cbegin != a->cmat->cmap->N) {
1808616b8fbbSStefano Zampini     setup = PETSC_TRUE;
1809616b8fbbSStefano Zampini     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1810616b8fbbSStefano Zampini     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1811616b8fbbSStefano Zampini     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1812616b8fbbSStefano Zampini     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
18135ea7661aSPierre Jolivet   }
1814616b8fbbSStefano Zampini   c = (Mat_MPIDense*)a->cmat->data;
1815616b8fbbSStefano Zampini   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1816616b8fbbSStefano Zampini   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1817616b8fbbSStefano Zampini   if (setup) { /* do we really need this? */
1818616b8fbbSStefano Zampini     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1819616b8fbbSStefano Zampini   }
1820616b8fbbSStefano Zampini   a->cmat->preallocated = PETSC_TRUE;
1821616b8fbbSStefano Zampini   a->cmat->assembled = PETSC_TRUE;
18225ea7661aSPierre Jolivet   a->matinuse = cbegin + 1;
18235ea7661aSPierre Jolivet   *v = a->cmat;
18245ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18255ea7661aSPierre Jolivet }
18265ea7661aSPierre Jolivet 
18275ea7661aSPierre Jolivet PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
18285ea7661aSPierre Jolivet {
18295ea7661aSPierre Jolivet   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1830616b8fbbSStefano Zampini   Mat_MPIDense   *c;
18315ea7661aSPierre Jolivet   PetscErrorCode ierr;
18325ea7661aSPierre Jolivet 
18335ea7661aSPierre Jolivet   PetscFunctionBegin;
1834616b8fbbSStefano Zampini   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1835616b8fbbSStefano Zampini   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1836616b8fbbSStefano Zampini   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
18375ea7661aSPierre Jolivet   a->matinuse = 0;
1838616b8fbbSStefano Zampini   c    = (Mat_MPIDense*)a->cmat->data;
1839616b8fbbSStefano Zampini   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
18405ea7661aSPierre Jolivet   *v   = NULL;
18415ea7661aSPierre Jolivet   PetscFunctionReturn(0);
18425ea7661aSPierre Jolivet }
18435ea7661aSPierre Jolivet 
18448cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1845273d9f13SBarry Smith {
1846273d9f13SBarry Smith   Mat_MPIDense   *a;
1847dfbe8321SBarry Smith   PetscErrorCode ierr;
1848273d9f13SBarry Smith 
1849273d9f13SBarry Smith   PetscFunctionBegin;
1850b00a9115SJed Brown   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1851b0a32e0cSBarry Smith   mat->data = (void*)a;
1852273d9f13SBarry Smith   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1853273d9f13SBarry Smith 
1854273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1855273d9f13SBarry Smith 
1856273d9f13SBarry Smith   /* build cache for off array entries formed */
1857273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
18582205254eSKarl Rupp 
1859ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1860273d9f13SBarry Smith 
1861273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1862273d9f13SBarry Smith   a->lvec        = 0;
1863273d9f13SBarry Smith   a->Mvctx       = 0;
1864273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1865273d9f13SBarry Smith 
186649a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1867ad16ce7aSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1868bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
18698572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
18708572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
18718572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
18726947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
18736947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1874d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1875d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1876d5ea218eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
18776947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
18786947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
18796947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
18806947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
18816947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
18826947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
18835ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
18845ea7661aSPierre Jolivet   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
18858baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
18868baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
18878baccfbdSHong Zhang #endif
1888*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
1889*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1890*d24d4204SJose E. Roman #endif
1891637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1892637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1893637a0070SStefano Zampini #endif
1894bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
18954222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
18964222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
18976718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
18986718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
18996718818eSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
19006718818eSStefano Zampini #endif
19018949adfdSHong Zhang 
1902af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1903af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
190438aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1905273d9f13SBarry Smith   PetscFunctionReturn(0);
1906273d9f13SBarry Smith }
1907273d9f13SBarry Smith 
1908209238afSKris Buschelman /*MC
1909637a0070SStefano Zampini    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1910637a0070SStefano Zampini 
1911637a0070SStefano Zampini    Options Database Keys:
1912637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1913637a0070SStefano Zampini 
1914637a0070SStefano Zampini   Level: beginner
1915637a0070SStefano Zampini 
1916637a0070SStefano Zampini .seealso:
1917637a0070SStefano Zampini 
1918637a0070SStefano Zampini M*/
1919637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1920637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1921637a0070SStefano Zampini {
1922637a0070SStefano Zampini   PetscErrorCode ierr;
1923637a0070SStefano Zampini 
1924637a0070SStefano Zampini   PetscFunctionBegin;
1925637a0070SStefano Zampini   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1926637a0070SStefano Zampini   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1927637a0070SStefano Zampini   PetscFunctionReturn(0);
1928637a0070SStefano Zampini }
1929637a0070SStefano Zampini #endif
1930637a0070SStefano Zampini 
1931637a0070SStefano Zampini /*MC
1932002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1933209238afSKris Buschelman 
1934209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1935209238afSKris Buschelman    and MATMPIDENSE otherwise.
1936209238afSKris Buschelman 
1937209238afSKris Buschelman    Options Database Keys:
1938209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1939209238afSKris Buschelman 
1940209238afSKris Buschelman   Level: beginner
1941209238afSKris Buschelman 
194201b82886SBarry Smith 
19436947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
19446947451fSStefano Zampini M*/
19456947451fSStefano Zampini 
19466947451fSStefano Zampini /*MC
19476947451fSStefano Zampini    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
19486947451fSStefano Zampini 
19496947451fSStefano Zampini    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
19506947451fSStefano Zampini    and MATMPIDENSECUDA otherwise.
19516947451fSStefano Zampini 
19526947451fSStefano Zampini    Options Database Keys:
19536947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
19546947451fSStefano Zampini 
19556947451fSStefano Zampini   Level: beginner
19566947451fSStefano Zampini 
19576947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1958209238afSKris Buschelman M*/
1959209238afSKris Buschelman 
1960273d9f13SBarry Smith /*@C
1961273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1962273d9f13SBarry Smith 
1963273d9f13SBarry Smith    Not collective
1964273d9f13SBarry Smith 
1965273d9f13SBarry Smith    Input Parameters:
19661c4f3114SJed Brown .  B - the matrix
19670298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1968273d9f13SBarry Smith    to control all matrix memory allocation.
1969273d9f13SBarry Smith 
1970273d9f13SBarry Smith    Notes:
1971273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1972273d9f13SBarry Smith    storage by columns.
1973273d9f13SBarry Smith 
1974273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1975273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
19760298fd71SBarry Smith    set data=NULL.
1977273d9f13SBarry Smith 
1978273d9f13SBarry Smith    Level: intermediate
1979273d9f13SBarry Smith 
1980273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1981273d9f13SBarry Smith @*/
19821c4f3114SJed Brown PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1983273d9f13SBarry Smith {
19844ac538c5SBarry Smith   PetscErrorCode ierr;
1985273d9f13SBarry Smith 
1986273d9f13SBarry Smith   PetscFunctionBegin;
1987d5ea218eSStefano Zampini   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
19881c4f3114SJed Brown   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1989273d9f13SBarry Smith   PetscFunctionReturn(0);
1990273d9f13SBarry Smith }
1991273d9f13SBarry Smith 
1992d3042a70SBarry Smith /*@
1993637a0070SStefano Zampini    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1994d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1995d3042a70SBarry Smith    into a matrix
1996d3042a70SBarry Smith 
1997d3042a70SBarry Smith    Not Collective
1998d3042a70SBarry Smith 
1999d3042a70SBarry Smith    Input Parameters:
2000d3042a70SBarry Smith +  mat - the matrix
2001d3042a70SBarry Smith -  array - the array in column major order
2002d3042a70SBarry Smith 
2003d3042a70SBarry Smith    Notes:
2004d3042a70SBarry Smith    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2005d3042a70SBarry Smith    freed when the matrix is destroyed.
2006d3042a70SBarry Smith 
2007d3042a70SBarry Smith    Level: developer
2008d3042a70SBarry Smith 
2009d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2010d3042a70SBarry Smith 
2011d3042a70SBarry Smith @*/
2012637a0070SStefano Zampini PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2013d3042a70SBarry Smith {
2014d3042a70SBarry Smith   PetscErrorCode ierr;
2015637a0070SStefano Zampini 
2016d3042a70SBarry Smith   PetscFunctionBegin;
2017d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2018d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2019d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2020637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2021637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2022637a0070SStefano Zampini #endif
2023d3042a70SBarry Smith   PetscFunctionReturn(0);
2024d3042a70SBarry Smith }
2025d3042a70SBarry Smith 
2026d3042a70SBarry Smith /*@
2027d3042a70SBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2028d3042a70SBarry Smith 
2029d3042a70SBarry Smith    Not Collective
2030d3042a70SBarry Smith 
2031d3042a70SBarry Smith    Input Parameters:
2032d3042a70SBarry Smith .  mat - the matrix
2033d3042a70SBarry Smith 
2034d3042a70SBarry Smith    Notes:
2035d3042a70SBarry Smith    You can only call this after a call to MatDensePlaceArray()
2036d3042a70SBarry Smith 
2037d3042a70SBarry Smith    Level: developer
2038d3042a70SBarry Smith 
2039d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2040d3042a70SBarry Smith 
2041d3042a70SBarry Smith @*/
2042d3042a70SBarry Smith PetscErrorCode  MatDenseResetArray(Mat mat)
2043d3042a70SBarry Smith {
2044d3042a70SBarry Smith   PetscErrorCode ierr;
2045637a0070SStefano Zampini 
2046d3042a70SBarry Smith   PetscFunctionBegin;
2047d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2048d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2049d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2050d3042a70SBarry Smith   PetscFunctionReturn(0);
2051d3042a70SBarry Smith }
2052d3042a70SBarry Smith 
2053d5ea218eSStefano Zampini /*@
2054d5ea218eSStefano Zampini    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2055d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2056d5ea218eSStefano Zampini    into a matrix
2057d5ea218eSStefano Zampini 
2058d5ea218eSStefano Zampini    Not Collective
2059d5ea218eSStefano Zampini 
2060d5ea218eSStefano Zampini    Input Parameters:
2061d5ea218eSStefano Zampini +  mat - the matrix
2062d5ea218eSStefano Zampini -  array - the array in column major order
2063d5ea218eSStefano Zampini 
2064d5ea218eSStefano Zampini    Notes:
2065d5ea218eSStefano Zampini    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2066d5ea218eSStefano Zampini    freed by the user. It will be freed when the matrix is destroyed.
2067d5ea218eSStefano Zampini 
2068d5ea218eSStefano Zampini    Level: developer
2069d5ea218eSStefano Zampini 
2070d5ea218eSStefano Zampini .seealso: MatDenseGetArray(), VecReplaceArray()
2071d5ea218eSStefano Zampini @*/
2072d5ea218eSStefano Zampini PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2073d5ea218eSStefano Zampini {
2074d5ea218eSStefano Zampini   PetscErrorCode ierr;
2075d5ea218eSStefano Zampini 
2076d5ea218eSStefano Zampini   PetscFunctionBegin;
2077d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2078d5ea218eSStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2079d5ea218eSStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2080d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA)
2081d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
2082d5ea218eSStefano Zampini #endif
2083d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2084d5ea218eSStefano Zampini }
2085d5ea218eSStefano Zampini 
2086637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
20878965ea79SLois Curfman McInnes /*@C
2088637a0070SStefano Zampini    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2089637a0070SStefano Zampini    array provided by the user. This is useful to avoid copying an array
2090637a0070SStefano Zampini    into a matrix
2091637a0070SStefano Zampini 
2092637a0070SStefano Zampini    Not Collective
2093637a0070SStefano Zampini 
2094637a0070SStefano Zampini    Input Parameters:
2095637a0070SStefano Zampini +  mat - the matrix
2096637a0070SStefano Zampini -  array - the array in column major order
2097637a0070SStefano Zampini 
2098637a0070SStefano Zampini    Notes:
2099637a0070SStefano Zampini    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2100637a0070SStefano Zampini    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2101637a0070SStefano Zampini 
2102637a0070SStefano Zampini    Level: developer
2103637a0070SStefano Zampini 
2104637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2105637a0070SStefano Zampini @*/
2106637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2107637a0070SStefano Zampini {
2108637a0070SStefano Zampini   PetscErrorCode ierr;
2109637a0070SStefano Zampini 
2110637a0070SStefano Zampini   PetscFunctionBegin;
2111d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2112637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2113637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2114637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2115637a0070SStefano Zampini   PetscFunctionReturn(0);
2116637a0070SStefano Zampini }
2117637a0070SStefano Zampini 
2118637a0070SStefano Zampini /*@C
2119637a0070SStefano Zampini    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2120637a0070SStefano Zampini 
2121637a0070SStefano Zampini    Not Collective
2122637a0070SStefano Zampini 
2123637a0070SStefano Zampini    Input Parameters:
2124637a0070SStefano Zampini .  mat - the matrix
2125637a0070SStefano Zampini 
2126637a0070SStefano Zampini    Notes:
2127637a0070SStefano Zampini    You can only call this after a call to MatDenseCUDAPlaceArray()
2128637a0070SStefano Zampini 
2129637a0070SStefano Zampini    Level: developer
2130637a0070SStefano Zampini 
2131637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2132637a0070SStefano Zampini 
2133637a0070SStefano Zampini @*/
2134637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2135637a0070SStefano Zampini {
2136637a0070SStefano Zampini   PetscErrorCode ierr;
2137637a0070SStefano Zampini 
2138637a0070SStefano Zampini   PetscFunctionBegin;
2139d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2140637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2141637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2142637a0070SStefano Zampini   PetscFunctionReturn(0);
2143637a0070SStefano Zampini }
2144637a0070SStefano Zampini 
2145637a0070SStefano Zampini /*@C
2146d5ea218eSStefano Zampini    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2147d5ea218eSStefano Zampini    array provided by the user. This is useful to avoid copying an array
2148d5ea218eSStefano Zampini    into a matrix
2149d5ea218eSStefano Zampini 
2150d5ea218eSStefano Zampini    Not Collective
2151d5ea218eSStefano Zampini 
2152d5ea218eSStefano Zampini    Input Parameters:
2153d5ea218eSStefano Zampini +  mat - the matrix
2154d5ea218eSStefano Zampini -  array - the array in column major order
2155d5ea218eSStefano Zampini 
2156d5ea218eSStefano Zampini    Notes:
2157d5ea218eSStefano Zampini    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2158d5ea218eSStefano Zampini    The memory passed in CANNOT be freed by the user. It will be freed
2159d5ea218eSStefano Zampini    when the matrix is destroyed. The array should respect the matrix leading dimension.
2160d5ea218eSStefano Zampini 
2161d5ea218eSStefano Zampini    Level: developer
2162d5ea218eSStefano Zampini 
2163d5ea218eSStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2164d5ea218eSStefano Zampini @*/
2165d5ea218eSStefano Zampini PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2166d5ea218eSStefano Zampini {
2167d5ea218eSStefano Zampini   PetscErrorCode ierr;
2168d5ea218eSStefano Zampini 
2169d5ea218eSStefano Zampini   PetscFunctionBegin;
2170d5ea218eSStefano Zampini   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2171d5ea218eSStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2172d5ea218eSStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2173d5ea218eSStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
2174d5ea218eSStefano Zampini   PetscFunctionReturn(0);
2175d5ea218eSStefano Zampini }
2176d5ea218eSStefano Zampini 
2177d5ea218eSStefano Zampini /*@C
2178637a0070SStefano Zampini    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2179637a0070SStefano Zampini 
2180637a0070SStefano Zampini    Not Collective
2181637a0070SStefano Zampini 
2182637a0070SStefano Zampini    Input Parameters:
2183637a0070SStefano Zampini .  A - the matrix
2184637a0070SStefano Zampini 
2185637a0070SStefano Zampini    Output Parameters
2186637a0070SStefano Zampini .  array - the GPU array in column major order
2187637a0070SStefano Zampini 
2188637a0070SStefano Zampini    Notes:
2189637a0070SStefano Zampini    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2190637a0070SStefano Zampini 
2191637a0070SStefano Zampini    Level: developer
2192637a0070SStefano Zampini 
2193637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2194637a0070SStefano Zampini @*/
2195637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2196637a0070SStefano Zampini {
2197637a0070SStefano Zampini   PetscErrorCode ierr;
2198637a0070SStefano Zampini 
2199637a0070SStefano Zampini   PetscFunctionBegin;
2200d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2201637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2202637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2203637a0070SStefano Zampini   PetscFunctionReturn(0);
2204637a0070SStefano Zampini }
2205637a0070SStefano Zampini 
2206637a0070SStefano Zampini /*@C
2207637a0070SStefano Zampini    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2208637a0070SStefano Zampini 
2209637a0070SStefano Zampini    Not Collective
2210637a0070SStefano Zampini 
2211637a0070SStefano Zampini    Input Parameters:
2212637a0070SStefano Zampini +  A - the matrix
2213637a0070SStefano Zampini -  array - the GPU array in column major order
2214637a0070SStefano Zampini 
2215637a0070SStefano Zampini    Notes:
2216637a0070SStefano Zampini 
2217637a0070SStefano Zampini    Level: developer
2218637a0070SStefano Zampini 
2219637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2220637a0070SStefano Zampini @*/
2221637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2222637a0070SStefano Zampini {
2223637a0070SStefano Zampini   PetscErrorCode ierr;
2224637a0070SStefano Zampini 
2225637a0070SStefano Zampini   PetscFunctionBegin;
2226d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2227637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2228637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2229637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2230637a0070SStefano Zampini   PetscFunctionReturn(0);
2231637a0070SStefano Zampini }
2232637a0070SStefano Zampini 
2233637a0070SStefano Zampini /*@C
2234637a0070SStefano Zampini    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2235637a0070SStefano Zampini 
2236637a0070SStefano Zampini    Not Collective
2237637a0070SStefano Zampini 
2238637a0070SStefano Zampini    Input Parameters:
2239637a0070SStefano Zampini .  A - the matrix
2240637a0070SStefano Zampini 
2241637a0070SStefano Zampini    Output Parameters
2242637a0070SStefano Zampini .  array - the GPU array in column major order
2243637a0070SStefano Zampini 
2244637a0070SStefano Zampini    Notes:
2245637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2246637a0070SStefano Zampini 
2247637a0070SStefano Zampini    Level: developer
2248637a0070SStefano Zampini 
2249637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2250637a0070SStefano Zampini @*/
2251637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2252637a0070SStefano Zampini {
2253637a0070SStefano Zampini   PetscErrorCode ierr;
2254637a0070SStefano Zampini 
2255637a0070SStefano Zampini   PetscFunctionBegin;
2256d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2257637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2258637a0070SStefano Zampini   PetscFunctionReturn(0);
2259637a0070SStefano Zampini }
2260637a0070SStefano Zampini 
2261637a0070SStefano Zampini /*@C
2262637a0070SStefano Zampini    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2263637a0070SStefano Zampini 
2264637a0070SStefano Zampini    Not Collective
2265637a0070SStefano Zampini 
2266637a0070SStefano Zampini    Input Parameters:
2267637a0070SStefano Zampini +  A - the matrix
2268637a0070SStefano Zampini -  array - the GPU array in column major order
2269637a0070SStefano Zampini 
2270637a0070SStefano Zampini    Notes:
2271637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2272637a0070SStefano Zampini 
2273637a0070SStefano Zampini    Level: developer
2274637a0070SStefano Zampini 
2275637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2276637a0070SStefano Zampini @*/
2277637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2278637a0070SStefano Zampini {
2279637a0070SStefano Zampini   PetscErrorCode ierr;
2280637a0070SStefano Zampini 
2281637a0070SStefano Zampini   PetscFunctionBegin;
2282637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2283637a0070SStefano Zampini   PetscFunctionReturn(0);
2284637a0070SStefano Zampini }
2285637a0070SStefano Zampini 
2286637a0070SStefano Zampini /*@C
2287637a0070SStefano Zampini    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2288637a0070SStefano Zampini 
2289637a0070SStefano Zampini    Not Collective
2290637a0070SStefano Zampini 
2291637a0070SStefano Zampini    Input Parameters:
2292637a0070SStefano Zampini .  A - the matrix
2293637a0070SStefano Zampini 
2294637a0070SStefano Zampini    Output Parameters
2295637a0070SStefano Zampini .  array - the GPU array in column major order
2296637a0070SStefano Zampini 
2297637a0070SStefano Zampini    Notes:
2298637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2299637a0070SStefano Zampini 
2300637a0070SStefano Zampini    Level: developer
2301637a0070SStefano Zampini 
2302637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2303637a0070SStefano Zampini @*/
2304637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2305637a0070SStefano Zampini {
2306637a0070SStefano Zampini   PetscErrorCode ierr;
2307637a0070SStefano Zampini 
2308637a0070SStefano Zampini   PetscFunctionBegin;
2309d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2310637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2311637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2312637a0070SStefano Zampini   PetscFunctionReturn(0);
2313637a0070SStefano Zampini }
2314637a0070SStefano Zampini 
2315637a0070SStefano Zampini /*@C
2316637a0070SStefano Zampini    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2317637a0070SStefano Zampini 
2318637a0070SStefano Zampini    Not Collective
2319637a0070SStefano Zampini 
2320637a0070SStefano Zampini    Input Parameters:
2321637a0070SStefano Zampini +  A - the matrix
2322637a0070SStefano Zampini -  array - the GPU array in column major order
2323637a0070SStefano Zampini 
2324637a0070SStefano Zampini    Notes:
2325637a0070SStefano Zampini 
2326637a0070SStefano Zampini    Level: developer
2327637a0070SStefano Zampini 
2328637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2329637a0070SStefano Zampini @*/
2330637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2331637a0070SStefano Zampini {
2332637a0070SStefano Zampini   PetscErrorCode ierr;
2333637a0070SStefano Zampini 
2334637a0070SStefano Zampini   PetscFunctionBegin;
2335d5ea218eSStefano Zampini   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2336637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2337637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2338637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2339637a0070SStefano Zampini   PetscFunctionReturn(0);
2340637a0070SStefano Zampini }
2341637a0070SStefano Zampini #endif
2342637a0070SStefano Zampini 
2343637a0070SStefano Zampini /*@C
2344637a0070SStefano Zampini    MatCreateDense - Creates a matrix in dense format.
23458965ea79SLois Curfman McInnes 
2346d083f849SBarry Smith    Collective
2347db81eaa0SLois Curfman McInnes 
23488965ea79SLois Curfman McInnes    Input Parameters:
2349db81eaa0SLois Curfman McInnes +  comm - MPI communicator
23508965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2351db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
23528965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2353db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
23546cfe35ddSJose E. Roman -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2355dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
23568965ea79SLois Curfman McInnes 
23578965ea79SLois Curfman McInnes    Output Parameter:
2358477f1c0bSLois Curfman McInnes .  A - the matrix
23598965ea79SLois Curfman McInnes 
2360b259b22eSLois Curfman McInnes    Notes:
236139ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
236239ddd567SLois Curfman McInnes    storage by columns.
23638965ea79SLois Curfman McInnes 
236418f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
236518f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
23666cfe35ddSJose E. Roman    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
236718f449edSLois Curfman McInnes 
23688965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
23698965ea79SLois Curfman McInnes    (possibly both).
23708965ea79SLois Curfman McInnes 
2371027ccd11SLois Curfman McInnes    Level: intermediate
2372027ccd11SLois Curfman McInnes 
237339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
23748965ea79SLois Curfman McInnes @*/
237569b1f4b7SBarry Smith PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
23768965ea79SLois Curfman McInnes {
23776849ba73SBarry Smith   PetscErrorCode ierr;
237813f74950SBarry Smith   PetscMPIInt    size;
23798965ea79SLois Curfman McInnes 
23803a40ed3dSBarry Smith   PetscFunctionBegin;
2381f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
23828491ab44SLisandro Dalcin   PetscValidLogicalCollectiveBool(*A,!!data,6);
2383f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2384273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2385273d9f13SBarry Smith   if (size > 1) {
2386273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2387273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
23886cfe35ddSJose E. Roman     if (data) {  /* user provided data array, so no need to assemble */
23896cfe35ddSJose E. Roman       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
23906cfe35ddSJose E. Roman       (*A)->assembled = PETSC_TRUE;
23916cfe35ddSJose E. Roman     }
2392273d9f13SBarry Smith   } else {
2393273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2394273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
23958c469469SLois Curfman McInnes   }
23963a40ed3dSBarry Smith   PetscFunctionReturn(0);
23978965ea79SLois Curfman McInnes }
23988965ea79SLois Curfman McInnes 
2399637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2400637a0070SStefano Zampini /*@C
2401637a0070SStefano Zampini    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2402637a0070SStefano Zampini 
2403637a0070SStefano Zampini    Collective
2404637a0070SStefano Zampini 
2405637a0070SStefano Zampini    Input Parameters:
2406637a0070SStefano Zampini +  comm - MPI communicator
2407637a0070SStefano Zampini .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2408637a0070SStefano Zampini .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2409637a0070SStefano Zampini .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2410637a0070SStefano Zampini .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2411637a0070SStefano Zampini -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2412637a0070SStefano Zampini    to control matrix memory allocation.
2413637a0070SStefano Zampini 
2414637a0070SStefano Zampini    Output Parameter:
2415637a0070SStefano Zampini .  A - the matrix
2416637a0070SStefano Zampini 
2417637a0070SStefano Zampini    Notes:
2418637a0070SStefano Zampini 
2419637a0070SStefano Zampini    Level: intermediate
2420637a0070SStefano Zampini 
2421637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense()
2422637a0070SStefano Zampini @*/
2423637a0070SStefano Zampini PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2424637a0070SStefano Zampini {
2425637a0070SStefano Zampini   PetscErrorCode ierr;
2426637a0070SStefano Zampini   PetscMPIInt    size;
2427637a0070SStefano Zampini 
2428637a0070SStefano Zampini   PetscFunctionBegin;
2429637a0070SStefano Zampini   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2430637a0070SStefano Zampini   PetscValidLogicalCollectiveBool(*A,!!data,6);
2431637a0070SStefano Zampini   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2432637a0070SStefano Zampini   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2433637a0070SStefano Zampini   if (size > 1) {
2434637a0070SStefano Zampini     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2435637a0070SStefano Zampini     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2436637a0070SStefano Zampini     if (data) {  /* user provided data array, so no need to assemble */
2437637a0070SStefano Zampini       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2438637a0070SStefano Zampini       (*A)->assembled = PETSC_TRUE;
2439637a0070SStefano Zampini     }
2440637a0070SStefano Zampini   } else {
2441637a0070SStefano Zampini     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2442637a0070SStefano Zampini     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2443637a0070SStefano Zampini   }
2444637a0070SStefano Zampini   PetscFunctionReturn(0);
2445637a0070SStefano Zampini }
2446637a0070SStefano Zampini #endif
2447637a0070SStefano Zampini 
24486849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
24498965ea79SLois Curfman McInnes {
24508965ea79SLois Curfman McInnes   Mat            mat;
24513501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2452dfbe8321SBarry Smith   PetscErrorCode ierr;
24538965ea79SLois Curfman McInnes 
24543a40ed3dSBarry Smith   PetscFunctionBegin;
24558965ea79SLois Curfman McInnes   *newmat = 0;
2456ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2457d0f46423SBarry Smith   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
24587adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2459834f8fabSBarry Smith   a       = (Mat_MPIDense*)mat->data;
24605aa7edbeSHong Zhang 
2461d5f3da31SBarry Smith   mat->factortype   = A->factortype;
2462c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
2463273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24648965ea79SLois Curfman McInnes 
2465e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
24663782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
2467e04c1aa4SHong Zhang 
24681e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
24691e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
24708965ea79SLois Curfman McInnes 
24715609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
24723bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2473637a0070SStefano Zampini   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
247401b82886SBarry Smith 
24758965ea79SLois Curfman McInnes   *newmat = mat;
24763a40ed3dSBarry Smith   PetscFunctionReturn(0);
24778965ea79SLois Curfman McInnes }
24788965ea79SLois Curfman McInnes 
2479eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2480eb91f321SVaclav Hapla {
2481eb91f321SVaclav Hapla   PetscErrorCode ierr;
248287d5ce66SSatish Balay   PetscBool      isbinary;
248387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
248487d5ce66SSatish Balay   PetscBool      ishdf5;
248587d5ce66SSatish Balay #endif
2486eb91f321SVaclav Hapla 
2487eb91f321SVaclav Hapla   PetscFunctionBegin;
2488eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2489eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2490eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
2491eb91f321SVaclav Hapla   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2492eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
249387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
2494eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
249587d5ce66SSatish Balay #endif
2496eb91f321SVaclav Hapla   if (isbinary) {
24978491ab44SLisandro Dalcin     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2498eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
249987d5ce66SSatish Balay   } else if (ishdf5) {
2500eb91f321SVaclav Hapla     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2501eb91f321SVaclav Hapla #endif
250287d5ce66SSatish Balay   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2503eb91f321SVaclav Hapla   PetscFunctionReturn(0);
2504eb91f321SVaclav Hapla }
2505eb91f321SVaclav Hapla 
25066718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
25076e4ee0c6SHong Zhang {
25086e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
25096e4ee0c6SHong Zhang   Mat            a,b;
2510ace3abfcSBarry Smith   PetscBool      flg;
25116e4ee0c6SHong Zhang   PetscErrorCode ierr;
251290ace30eSBarry Smith 
25136e4ee0c6SHong Zhang   PetscFunctionBegin;
25146e4ee0c6SHong Zhang   a    = matA->A;
25156e4ee0c6SHong Zhang   b    = matB->A;
25166e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2517b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
25186e4ee0c6SHong Zhang   PetscFunctionReturn(0);
25196e4ee0c6SHong Zhang }
252090ace30eSBarry Smith 
25216718818eSStefano Zampini PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2522baa3c1c6SHong Zhang {
2523baa3c1c6SHong Zhang   PetscErrorCode        ierr;
25246718818eSStefano Zampini   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2525baa3c1c6SHong Zhang 
2526baa3c1c6SHong Zhang   PetscFunctionBegin;
2527637a0070SStefano Zampini   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2528637a0070SStefano Zampini   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2529baa3c1c6SHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
2530baa3c1c6SHong Zhang   PetscFunctionReturn(0);
2531baa3c1c6SHong Zhang }
2532baa3c1c6SHong Zhang 
25336718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2534cc48ffa7SToby Isaac {
2535cc48ffa7SToby Isaac   PetscErrorCode        ierr;
25366718818eSStefano Zampini   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2537cc48ffa7SToby Isaac 
2538cc48ffa7SToby Isaac   PetscFunctionBegin;
2539cc48ffa7SToby Isaac   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2540faa55883SToby Isaac   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2541cc48ffa7SToby Isaac   ierr = PetscFree(abt);CHKERRQ(ierr);
2542cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2543cc48ffa7SToby Isaac }
2544cc48ffa7SToby Isaac 
25456718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2546cb20be35SHong Zhang {
2547baa3c1c6SHong Zhang   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
25486718818eSStefano Zampini   Mat_TransMatMultDense *atb;
2549cb20be35SHong Zhang   PetscErrorCode        ierr;
2550cb20be35SHong Zhang   MPI_Comm              comm;
25516718818eSStefano Zampini   PetscMPIInt           size,*recvcounts;
25526718818eSStefano Zampini   PetscScalar           *carray,*sendbuf;
2553637a0070SStefano Zampini   const PetscScalar     *atbarray;
2554d5017740SHong Zhang   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2555e68c0b26SHong Zhang   const PetscInt        *ranges;
2556cb20be35SHong Zhang 
2557cb20be35SHong Zhang   PetscFunctionBegin;
25586718818eSStefano Zampini   MatCheckProduct(C,3);
25596718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
25606718818eSStefano Zampini   atb = (Mat_TransMatMultDense *)C->product->data;
25616718818eSStefano Zampini   recvcounts = atb->recvcounts;
25626718818eSStefano Zampini   sendbuf = atb->sendbuf;
25636718818eSStefano Zampini 
2564cb20be35SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2565cb20be35SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2566e68c0b26SHong Zhang 
2567c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
2568637a0070SStefano Zampini   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2569cb20be35SHong Zhang 
2570cb20be35SHong Zhang   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2571cb20be35SHong Zhang 
2572660d5466SHong Zhang   /* arrange atbarray into sendbuf */
2573637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2574637a0070SStefano Zampini   for (proc=0, k=0; proc<size; proc++) {
2575baa3c1c6SHong Zhang     for (j=0; j<cN; j++) {
2576c5ef1628SHong Zhang       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2577cb20be35SHong Zhang     }
2578cb20be35SHong Zhang   }
2579637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2580637a0070SStefano Zampini 
2581c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
25820acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
25833462b7efSHong Zhang   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
25840acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
25850acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25860acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587cb20be35SHong Zhang   PetscFunctionReturn(0);
2588cb20be35SHong Zhang }
2589cb20be35SHong Zhang 
25906718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2591cb20be35SHong Zhang {
2592cb20be35SHong Zhang   PetscErrorCode        ierr;
2593cb20be35SHong Zhang   MPI_Comm              comm;
2594baa3c1c6SHong Zhang   PetscMPIInt           size;
2595660d5466SHong Zhang   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2596baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
25976718818eSStefano Zampini   PetscBool             cisdense;
25980acaeb71SStefano Zampini   PetscInt              i;
25990acaeb71SStefano Zampini   const PetscInt        *ranges;
2600cb20be35SHong Zhang 
2601cb20be35SHong Zhang   PetscFunctionBegin;
26026718818eSStefano Zampini   MatCheckProduct(C,3);
26036718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2604baa3c1c6SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2605cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2606cb20be35SHong Zhang     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2607cb20be35SHong Zhang   }
2608cb20be35SHong Zhang 
26094222ddf1SHong Zhang   /* create matrix product C */
26106718818eSStefano Zampini   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
26116718818eSStefano Zampini   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
26126718818eSStefano Zampini   if (!cisdense) {
26136718818eSStefano Zampini     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
26146718818eSStefano Zampini   }
261518992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
2616baa3c1c6SHong Zhang 
26174222ddf1SHong Zhang   /* create data structure for reuse C */
2618baa3c1c6SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2619baa3c1c6SHong Zhang   ierr = PetscNew(&atb);CHKERRQ(ierr);
26204222ddf1SHong Zhang   cM   = C->rmap->N;
26216718818eSStefano Zampini   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
26220acaeb71SStefano Zampini   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
26230acaeb71SStefano Zampini   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2624baa3c1c6SHong Zhang 
26256718818eSStefano Zampini   C->product->data    = atb;
26266718818eSStefano Zampini   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2627cb20be35SHong Zhang   PetscFunctionReturn(0);
2628cb20be35SHong Zhang }
2629cb20be35SHong Zhang 
26304222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2631cb20be35SHong Zhang {
2632cb20be35SHong Zhang   PetscErrorCode        ierr;
2633cc48ffa7SToby Isaac   MPI_Comm              comm;
2634cc48ffa7SToby Isaac   PetscMPIInt           i, size;
2635cc48ffa7SToby Isaac   PetscInt              maxRows, bufsiz;
2636cc48ffa7SToby Isaac   PetscMPIInt           tag;
26374222ddf1SHong Zhang   PetscInt              alg;
2638cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
26394222ddf1SHong Zhang   Mat_Product           *product = C->product;
26404222ddf1SHong Zhang   PetscBool             flg;
2641cc48ffa7SToby Isaac 
2642cc48ffa7SToby Isaac   PetscFunctionBegin;
26436718818eSStefano Zampini   MatCheckProduct(C,4);
26446718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
26454222ddf1SHong Zhang   /* check local size of A and B */
2646637a0070SStefano Zampini   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2647cc48ffa7SToby Isaac 
26484222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2649637a0070SStefano Zampini   alg  = flg ? 0 : 1;
2650cc48ffa7SToby Isaac 
26514222ddf1SHong Zhang   /* setup matrix product C */
26524222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
26534222ddf1SHong Zhang   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
265418992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
26554222ddf1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
26564222ddf1SHong Zhang 
26574222ddf1SHong Zhang   /* create data structure for reuse C */
26584222ddf1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2659cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2660cc48ffa7SToby Isaac   ierr = PetscNew(&abt);CHKERRQ(ierr);
2661cc48ffa7SToby Isaac   abt->tag = tag;
2662faa55883SToby Isaac   abt->alg = alg;
2663faa55883SToby Isaac   switch (alg) {
26644222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2665cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2666cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
2667cc48ffa7SToby Isaac     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2668faa55883SToby Isaac     break;
26694222ddf1SHong Zhang   default: /* alg: "allgatherv" */
2670faa55883SToby Isaac     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2671faa55883SToby Isaac     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2672faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2673faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2674faa55883SToby Isaac     break;
2675faa55883SToby Isaac   }
2676cc48ffa7SToby Isaac 
26776718818eSStefano Zampini   C->product->data    = abt;
26786718818eSStefano Zampini   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
26794222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2680cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2681cc48ffa7SToby Isaac }
2682cc48ffa7SToby Isaac 
2683faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2684cc48ffa7SToby Isaac {
2685cc48ffa7SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
26866718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2687cc48ffa7SToby Isaac   PetscErrorCode        ierr;
2688cc48ffa7SToby Isaac   MPI_Comm              comm;
2689cc48ffa7SToby Isaac   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2690637a0070SStefano Zampini   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2691cc48ffa7SToby Isaac   PetscInt              i,cK=A->cmap->N,k,j,bn;
2692cc48ffa7SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2693637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2694637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2695cc48ffa7SToby Isaac   MPI_Request           reqs[2];
2696cc48ffa7SToby Isaac   const PetscInt        *ranges;
2697cc48ffa7SToby Isaac 
2698cc48ffa7SToby Isaac   PetscFunctionBegin;
26996718818eSStefano Zampini   MatCheckProduct(C,3);
27006718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
27016718818eSStefano Zampini   abt  = (Mat_MatTransMultDense*)C->product->data;
27026718818eSStefano Zampini   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2703cc48ffa7SToby Isaac   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2704cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2705637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2706637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
27070acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2708637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2709637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2710637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2711637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2712637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2713637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2714cc48ffa7SToby Isaac   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2715cc48ffa7SToby Isaac   bn   = B->rmap->n;
2716637a0070SStefano Zampini   if (blda == bn) {
2717637a0070SStefano Zampini     sendbuf = (PetscScalar*)bv;
2718cc48ffa7SToby Isaac   } else {
2719cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2720cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2721cc48ffa7SToby Isaac       for (j = 0; j < bn; j++, k++) {
2722637a0070SStefano Zampini         sendbuf[k] = bv[i * blda + j];
2723cc48ffa7SToby Isaac       }
2724cc48ffa7SToby Isaac     }
2725cc48ffa7SToby Isaac   }
2726cc48ffa7SToby Isaac   if (size > 1) {
2727cc48ffa7SToby Isaac     sendto = (rank + size - 1) % size;
2728cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2729cc48ffa7SToby Isaac   } else {
2730cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2731cc48ffa7SToby Isaac   }
2732cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2733cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2734cc48ffa7SToby Isaac   recvisfrom = rank;
2735cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2736cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2737cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2738cc48ffa7SToby Isaac     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2739cc48ffa7SToby Isaac 
2740cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2741cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2742cc48ffa7SToby Isaac       sendsiz = cK * bn;
2743cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2744cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2745cc48ffa7SToby Isaac       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2746cc48ffa7SToby Isaac       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2747cc48ffa7SToby Isaac     }
2748cc48ffa7SToby Isaac 
2749cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
2750cc48ffa7SToby Isaac     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
275150ce3c9cSStefano Zampini     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2752cc48ffa7SToby Isaac 
2753cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2754cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2755cc48ffa7SToby Isaac       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2756cc48ffa7SToby Isaac     }
2757cc48ffa7SToby Isaac     bn = nextbn;
2758cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2759cc48ffa7SToby Isaac     sendbuf = recvbuf;
2760cc48ffa7SToby Isaac   }
2761637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2762637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
27630acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
27640acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27650acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2767cc48ffa7SToby Isaac }
2768cc48ffa7SToby Isaac 
2769faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2770faa55883SToby Isaac {
2771faa55883SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
27726718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2773faa55883SToby Isaac   PetscErrorCode        ierr;
2774faa55883SToby Isaac   MPI_Comm              comm;
2775637a0070SStefano Zampini   PetscMPIInt           size;
2776637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2777637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2778637a0070SStefano Zampini   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2779faa55883SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2780637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, clda;
2781faa55883SToby Isaac 
2782faa55883SToby Isaac   PetscFunctionBegin;
27836718818eSStefano Zampini   MatCheckProduct(C,3);
27846718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
27856718818eSStefano Zampini   abt  = (Mat_MatTransMultDense*)C->product->data;
2786faa55883SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2787faa55883SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2788637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2789637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
27900acaeb71SStefano Zampini   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2791637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2792637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2793637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2794637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2795637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2796faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2797faa55883SToby Isaac   bn      = B->rmap->n;
2798faa55883SToby Isaac   sendbuf = abt->buf[0];
2799faa55883SToby Isaac   recvbuf = abt->buf[1];
2800faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2801faa55883SToby Isaac     for (i = 0; i < cK; i++, k++) {
2802637a0070SStefano Zampini       sendbuf[k] = bv[i * blda + j];
2803faa55883SToby Isaac     }
2804faa55883SToby Isaac   }
2805637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2806faa55883SToby Isaac   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2807faa55883SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2808faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2809faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
281050ce3c9cSStefano Zampini   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2811637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2812637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
28130acaeb71SStefano Zampini   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
28140acaeb71SStefano Zampini   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28150acaeb71SStefano Zampini   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816faa55883SToby Isaac   PetscFunctionReturn(0);
2817faa55883SToby Isaac }
2818faa55883SToby Isaac 
2819faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2820faa55883SToby Isaac {
28216718818eSStefano Zampini   Mat_MatTransMultDense *abt;
2822faa55883SToby Isaac   PetscErrorCode        ierr;
2823faa55883SToby Isaac 
2824faa55883SToby Isaac   PetscFunctionBegin;
28256718818eSStefano Zampini   MatCheckProduct(C,3);
28266718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
28276718818eSStefano Zampini   abt = (Mat_MatTransMultDense*)C->product->data;
2828faa55883SToby Isaac   switch (abt->alg) {
2829faa55883SToby Isaac   case 1:
2830faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2831faa55883SToby Isaac     break;
2832faa55883SToby Isaac   default:
2833faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2834faa55883SToby Isaac     break;
2835faa55883SToby Isaac   }
2836faa55883SToby Isaac   PetscFunctionReturn(0);
2837faa55883SToby Isaac }
2838faa55883SToby Isaac 
28396718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2840320f2790SHong Zhang {
2841320f2790SHong Zhang   PetscErrorCode   ierr;
28426718818eSStefano Zampini   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2843320f2790SHong Zhang 
2844320f2790SHong Zhang   PetscFunctionBegin;
2845320f2790SHong Zhang   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2846320f2790SHong Zhang   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2847320f2790SHong Zhang   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2848320f2790SHong Zhang   ierr = PetscFree(ab);CHKERRQ(ierr);
2849320f2790SHong Zhang   PetscFunctionReturn(0);
2850320f2790SHong Zhang }
2851320f2790SHong Zhang 
28525242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
2853320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2854320f2790SHong Zhang {
2855320f2790SHong Zhang   PetscErrorCode   ierr;
28566718818eSStefano Zampini   Mat_MatMultDense *ab;
2857320f2790SHong Zhang 
2858320f2790SHong Zhang   PetscFunctionBegin;
28596718818eSStefano Zampini   MatCheckProduct(C,3);
28606718818eSStefano Zampini   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
28616718818eSStefano Zampini   ab   = (Mat_MatMultDense*)C->product->data;
2862de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2863de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
28644222ddf1SHong Zhang   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2865de0a22f0SHong Zhang   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2866320f2790SHong Zhang   PetscFunctionReturn(0);
2867320f2790SHong Zhang }
2868320f2790SHong Zhang 
28696718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2870320f2790SHong Zhang {
2871320f2790SHong Zhang   PetscErrorCode   ierr;
2872320f2790SHong Zhang   Mat              Ae,Be,Ce;
2873320f2790SHong Zhang   Mat_MatMultDense *ab;
2874320f2790SHong Zhang 
2875320f2790SHong Zhang   PetscFunctionBegin;
28766718818eSStefano Zampini   MatCheckProduct(C,4);
28776718818eSStefano Zampini   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
28784222ddf1SHong Zhang   /* check local size of A and B */
2879320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2880378336b6SHong Zhang     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2881320f2790SHong Zhang   }
2882320f2790SHong Zhang 
28834222ddf1SHong Zhang   /* create elemental matrices Ae and Be */
28844222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
28854222ddf1SHong Zhang   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
28864222ddf1SHong Zhang   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
28874222ddf1SHong Zhang   ierr = MatSetUp(Ae);CHKERRQ(ierr);
28884222ddf1SHong Zhang   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2889320f2790SHong Zhang 
28904222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
28914222ddf1SHong Zhang   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
28924222ddf1SHong Zhang   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
28934222ddf1SHong Zhang   ierr = MatSetUp(Be);CHKERRQ(ierr);
28944222ddf1SHong Zhang   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2895320f2790SHong Zhang 
28964222ddf1SHong Zhang   /* compute symbolic Ce = Ae*Be */
28974222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
28984222ddf1SHong Zhang   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
28994222ddf1SHong Zhang 
29004222ddf1SHong Zhang   /* setup C */
29014222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
29024222ddf1SHong Zhang   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
29034222ddf1SHong Zhang   ierr = MatSetUp(C);CHKERRQ(ierr);
2904320f2790SHong Zhang 
2905320f2790SHong Zhang   /* create data structure for reuse Cdense */
2906320f2790SHong Zhang   ierr = PetscNew(&ab);CHKERRQ(ierr);
2907320f2790SHong Zhang   ab->Ae = Ae;
2908320f2790SHong Zhang   ab->Be = Be;
2909320f2790SHong Zhang   ab->Ce = Ce;
29106718818eSStefano Zampini 
29116718818eSStefano Zampini   C->product->data    = ab;
29126718818eSStefano Zampini   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
29134222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2914320f2790SHong Zhang   PetscFunctionReturn(0);
2915320f2790SHong Zhang }
29164222ddf1SHong Zhang #endif
29174222ddf1SHong Zhang /* ----------------------------------------------- */
29184222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
29194222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2920320f2790SHong Zhang {
2921320f2790SHong Zhang   PetscFunctionBegin;
29224222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
29234222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
2924320f2790SHong Zhang   PetscFunctionReturn(0);
2925320f2790SHong Zhang }
29265242a7b1SHong Zhang #endif
292786aefd0dSHong Zhang 
29284222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
29294222ddf1SHong Zhang {
29304222ddf1SHong Zhang   Mat_Product *product = C->product;
29314222ddf1SHong Zhang   Mat         A = product->A,B=product->B;
29324222ddf1SHong Zhang 
29334222ddf1SHong Zhang   PetscFunctionBegin;
29344222ddf1SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
29354222ddf1SHong Zhang     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
29366718818eSStefano Zampini   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
29376718818eSStefano Zampini   C->ops->productsymbolic = MatProductSymbolic_AtB;
29384222ddf1SHong Zhang   PetscFunctionReturn(0);
29394222ddf1SHong Zhang }
29404222ddf1SHong Zhang 
29414222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
29424222ddf1SHong Zhang {
29434222ddf1SHong Zhang   PetscErrorCode ierr;
29444222ddf1SHong Zhang   Mat_Product    *product = C->product;
29454222ddf1SHong Zhang   const char     *algTypes[2] = {"allgatherv","cyclic"};
29464222ddf1SHong Zhang   PetscInt       alg,nalg = 2;
29474222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
29484222ddf1SHong Zhang 
29494222ddf1SHong Zhang   PetscFunctionBegin;
29504222ddf1SHong Zhang   /* Set default algorithm */
29514222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
29524222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
29534222ddf1SHong Zhang   if (flg) {
29544222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29554222ddf1SHong Zhang   }
29564222ddf1SHong Zhang 
29574222ddf1SHong Zhang   /* Get runtime option */
29584222ddf1SHong Zhang   if (product->api_user) {
29594222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
29604222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29614222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
29624222ddf1SHong Zhang   } else {
29634222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
29644222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29654222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
29664222ddf1SHong Zhang   }
29674222ddf1SHong Zhang   if (flg) {
29684222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29694222ddf1SHong Zhang   }
29704222ddf1SHong Zhang 
29714222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
29724222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
29734222ddf1SHong Zhang   PetscFunctionReturn(0);
29744222ddf1SHong Zhang }
29754222ddf1SHong Zhang 
29764222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
29774222ddf1SHong Zhang {
29784222ddf1SHong Zhang   PetscErrorCode ierr;
29794222ddf1SHong Zhang   Mat_Product    *product = C->product;
29804222ddf1SHong Zhang 
29814222ddf1SHong Zhang   PetscFunctionBegin;
29824222ddf1SHong Zhang   switch (product->type) {
29834222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
29844222ddf1SHong Zhang   case MATPRODUCT_AB:
29854222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
29864222ddf1SHong Zhang     break;
29874222ddf1SHong Zhang #endif
29884222ddf1SHong Zhang   case MATPRODUCT_AtB:
29894222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
29904222ddf1SHong Zhang     break;
29914222ddf1SHong Zhang   case MATPRODUCT_ABt:
29924222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
29934222ddf1SHong Zhang     break;
29946718818eSStefano Zampini   default:
29956718818eSStefano Zampini     break;
29964222ddf1SHong Zhang   }
29974222ddf1SHong Zhang   PetscFunctionReturn(0);
29984222ddf1SHong Zhang }
2999