xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6947451f174971b5cbba4c8ab49dd392302c23da)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
8baa3c1c6SHong Zhang #include <petscblaslapack.h>
98965ea79SLois Curfman McInnes 
10ab92ecdeSBarry Smith /*@
11ab92ecdeSBarry Smith 
12ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
14ab92ecdeSBarry Smith 
15ab92ecdeSBarry Smith     Input Parameter:
16ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
17ab92ecdeSBarry Smith 
18ab92ecdeSBarry Smith     Output Parameter:
19ab92ecdeSBarry Smith .      B - the inner matrix
20ab92ecdeSBarry Smith 
218e6c10adSSatish Balay     Level: intermediate
228e6c10adSSatish Balay 
23ab92ecdeSBarry Smith @*/
24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25ab92ecdeSBarry Smith {
26ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27ab92ecdeSBarry Smith   PetscErrorCode ierr;
28ace3abfcSBarry Smith   PetscBool      flg;
29ab92ecdeSBarry Smith 
30ab92ecdeSBarry Smith   PetscFunctionBegin;
31251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
322205254eSKarl Rupp   if (flg) *B = mat->A;
332205254eSKarl Rupp   else *B = A;
34ab92ecdeSBarry Smith   PetscFunctionReturn(0);
35ab92ecdeSBarry Smith }
36ab92ecdeSBarry Smith 
37ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38ba8c8a56SBarry Smith {
39ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40ba8c8a56SBarry Smith   PetscErrorCode ierr;
41d0f46423SBarry Smith   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42ba8c8a56SBarry Smith 
43ba8c8a56SBarry Smith   PetscFunctionBegin;
44e7e72b3dSBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45ba8c8a56SBarry Smith   lrow = row - rstart;
46ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47ba8c8a56SBarry Smith   PetscFunctionReturn(0);
48ba8c8a56SBarry Smith }
49ba8c8a56SBarry Smith 
50637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51ba8c8a56SBarry Smith {
52637a0070SStefano Zampini   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
53ba8c8a56SBarry Smith   PetscErrorCode ierr;
54637a0070SStefano Zampini   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
55ba8c8a56SBarry Smith 
56ba8c8a56SBarry Smith   PetscFunctionBegin;
57637a0070SStefano Zampini   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
58637a0070SStefano Zampini   lrow = row - rstart;
59637a0070SStefano Zampini   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
60ba8c8a56SBarry Smith   PetscFunctionReturn(0);
61ba8c8a56SBarry Smith }
62ba8c8a56SBarry Smith 
6311bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
640de54da6SSatish Balay {
650de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
666849ba73SBarry Smith   PetscErrorCode ierr;
67d0f46423SBarry Smith   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
6887828ca2SBarry Smith   PetscScalar    *array;
690de54da6SSatish Balay   MPI_Comm       comm;
70637a0070SStefano Zampini   PetscBool      cong;
7111bd1e4dSLisandro Dalcin   Mat            B;
720de54da6SSatish Balay 
730de54da6SSatish Balay   PetscFunctionBegin;
74637a0070SStefano Zampini   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
75637a0070SStefano Zampini   if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
7611bd1e4dSLisandro Dalcin   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
7711bd1e4dSLisandro Dalcin   if (!B) {
780de54da6SSatish Balay     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
7911bd1e4dSLisandro Dalcin     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
8011bd1e4dSLisandro Dalcin     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
8111bd1e4dSLisandro Dalcin     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
828c778c55SBarry Smith     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
8311bd1e4dSLisandro Dalcin     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
848c778c55SBarry Smith     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
8511bd1e4dSLisandro Dalcin     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8611bd1e4dSLisandro Dalcin     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8711bd1e4dSLisandro Dalcin     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
8811bd1e4dSLisandro Dalcin     *a   = B;
8911bd1e4dSLisandro Dalcin     ierr = MatDestroy(&B);CHKERRQ(ierr);
902205254eSKarl Rupp   } else *a = B;
910de54da6SSatish Balay   PetscFunctionReturn(0);
920de54da6SSatish Balay }
930de54da6SSatish Balay 
9413f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
958965ea79SLois Curfman McInnes {
9639b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
97dfbe8321SBarry Smith   PetscErrorCode ierr;
98d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
99ace3abfcSBarry Smith   PetscBool      roworiented = A->roworiented;
1008965ea79SLois Curfman McInnes 
1013a40ed3dSBarry Smith   PetscFunctionBegin;
1028965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1035ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
104e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1058965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1068965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
10739b7565bSBarry Smith       if (roworiented) {
10839b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1093a40ed3dSBarry Smith       } else {
1108965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1115ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
112e32f2f54SBarry Smith           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
11339b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
11439b7565bSBarry Smith         }
1158965ea79SLois Curfman McInnes       }
1162205254eSKarl Rupp     } else if (!A->donotstash) {
1175080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
11839b7565bSBarry Smith       if (roworiented) {
119b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
120d36fbae8SSatish Balay       } else {
121b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
12239b7565bSBarry Smith       }
123b49de8d1SLois Curfman McInnes     }
124b49de8d1SLois Curfman McInnes   }
1253a40ed3dSBarry Smith   PetscFunctionReturn(0);
126b49de8d1SLois Curfman McInnes }
127b49de8d1SLois Curfman McInnes 
12813f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
129b49de8d1SLois Curfman McInnes {
130b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
131dfbe8321SBarry Smith   PetscErrorCode ierr;
132d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
133b49de8d1SLois Curfman McInnes 
1343a40ed3dSBarry Smith   PetscFunctionBegin;
135b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
136e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
137e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
138b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
139b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
140b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
141e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
142e7e72b3dSBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
144b49de8d1SLois Curfman McInnes       }
145e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
1468965ea79SLois Curfman McInnes   }
1473a40ed3dSBarry Smith   PetscFunctionReturn(0);
1488965ea79SLois Curfman McInnes }
1498965ea79SLois Curfman McInnes 
15049a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
15149a6ff4bSBarry Smith {
15249a6ff4bSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
15349a6ff4bSBarry Smith   PetscErrorCode ierr;
15449a6ff4bSBarry Smith 
15549a6ff4bSBarry Smith   PetscFunctionBegin;
15649a6ff4bSBarry Smith   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
15749a6ff4bSBarry Smith   PetscFunctionReturn(0);
15849a6ff4bSBarry Smith }
15949a6ff4bSBarry Smith 
160637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
161ff14e315SSatish Balay {
162ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163dfbe8321SBarry Smith   PetscErrorCode ierr;
164ff14e315SSatish Balay 
1653a40ed3dSBarry Smith   PetscFunctionBegin;
1668c778c55SBarry Smith   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
1673a40ed3dSBarry Smith   PetscFunctionReturn(0);
168ff14e315SSatish Balay }
169ff14e315SSatish Balay 
170637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
1718572280aSBarry Smith {
1728572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1738572280aSBarry Smith   PetscErrorCode ierr;
1748572280aSBarry Smith 
1758572280aSBarry Smith   PetscFunctionBegin;
1768572280aSBarry Smith   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
1778572280aSBarry Smith   PetscFunctionReturn(0);
1788572280aSBarry Smith }
1798572280aSBarry Smith 
180*6947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
181*6947451fSStefano Zampini {
182*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183*6947451fSStefano Zampini   PetscErrorCode ierr;
184*6947451fSStefano Zampini 
185*6947451fSStefano Zampini   PetscFunctionBegin;
186*6947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
187*6947451fSStefano Zampini   PetscFunctionReturn(0);
188*6947451fSStefano Zampini }
189*6947451fSStefano Zampini 
190637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
191d3042a70SBarry Smith {
192d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193d3042a70SBarry Smith   PetscErrorCode ierr;
194d3042a70SBarry Smith 
195d3042a70SBarry Smith   PetscFunctionBegin;
196*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
197d3042a70SBarry Smith   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
198d3042a70SBarry Smith   PetscFunctionReturn(0);
199d3042a70SBarry Smith }
200d3042a70SBarry Smith 
201d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
202d3042a70SBarry Smith {
203d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204d3042a70SBarry Smith   PetscErrorCode ierr;
205d3042a70SBarry Smith 
206d3042a70SBarry Smith   PetscFunctionBegin;
207*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
208d3042a70SBarry Smith   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
209d3042a70SBarry Smith   PetscFunctionReturn(0);
210d3042a70SBarry Smith }
211d3042a70SBarry Smith 
2127dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
213ca3fa75bSLois Curfman McInnes {
214ca3fa75bSLois Curfman McInnes   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
2156849ba73SBarry Smith   PetscErrorCode    ierr;
216637a0070SStefano Zampini   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
2175d0c19d7SBarry Smith   const PetscInt    *irow,*icol;
218637a0070SStefano Zampini   const PetscScalar *v;
219637a0070SStefano Zampini   PetscScalar       *bv;
220ca3fa75bSLois Curfman McInnes   Mat               newmat;
2214aa3045dSJed Brown   IS                iscol_local;
22242a884f0SBarry Smith   MPI_Comm          comm_is,comm_mat;
223ca3fa75bSLois Curfman McInnes 
224ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
22542a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
22642a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
22742a884f0SBarry Smith   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
22842a884f0SBarry Smith 
2294aa3045dSJed Brown   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
230ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
2314aa3045dSJed Brown   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
232b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
233b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
2344aa3045dSJed Brown   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
235ca3fa75bSLois Curfman McInnes 
236ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2377eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2387eba5e9cSLois Curfman McInnes      original matrix! */
239ca3fa75bSLois Curfman McInnes 
240ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
2417eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
242ca3fa75bSLois Curfman McInnes 
243ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
244ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
245e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2467eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
247ca3fa75bSLois Curfman McInnes     newmat = *B;
248ca3fa75bSLois Curfman McInnes   } else {
249ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
250ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
2514aa3045dSJed Brown     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
2527adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2530298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
254ca3fa75bSLois Curfman McInnes   }
255ca3fa75bSLois Curfman McInnes 
256ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
257ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
258637a0070SStefano Zampini   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
259637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
260637a0070SStefano Zampini   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
2614aa3045dSJed Brown   for (i=0; i<Ncols; i++) {
262637a0070SStefano Zampini     const PetscScalar *av = v + lda*icol[i];
263ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2647eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
265ca3fa75bSLois Curfman McInnes     }
266ca3fa75bSLois Curfman McInnes   }
267637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
268637a0070SStefano Zampini   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
269ca3fa75bSLois Curfman McInnes 
270ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
271ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
273ca3fa75bSLois Curfman McInnes 
274ca3fa75bSLois Curfman McInnes   /* Free work space */
275ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
2765bdf786aSShri Abhyankar   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
27732bb1f2dSLisandro Dalcin   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
278ca3fa75bSLois Curfman McInnes   *B   = newmat;
279ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
280ca3fa75bSLois Curfman McInnes }
281ca3fa75bSLois Curfman McInnes 
282637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
283ff14e315SSatish Balay {
28473a71a0fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
28573a71a0fSBarry Smith   PetscErrorCode ierr;
28673a71a0fSBarry Smith 
2873a40ed3dSBarry Smith   PetscFunctionBegin;
2888c778c55SBarry Smith   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
2893a40ed3dSBarry Smith   PetscFunctionReturn(0);
290ff14e315SSatish Balay }
291ff14e315SSatish Balay 
292637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
2938572280aSBarry Smith {
2948572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
2958572280aSBarry Smith   PetscErrorCode ierr;
2968572280aSBarry Smith 
2978572280aSBarry Smith   PetscFunctionBegin;
2988572280aSBarry Smith   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
2998572280aSBarry Smith   PetscFunctionReturn(0);
3008572280aSBarry Smith }
3018572280aSBarry Smith 
302*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
303*6947451fSStefano Zampini {
304*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
305*6947451fSStefano Zampini   PetscErrorCode ierr;
306*6947451fSStefano Zampini 
307*6947451fSStefano Zampini   PetscFunctionBegin;
308*6947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
309*6947451fSStefano Zampini   PetscFunctionReturn(0);
310*6947451fSStefano Zampini }
311*6947451fSStefano Zampini 
312dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
3138965ea79SLois Curfman McInnes {
31439ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
315dfbe8321SBarry Smith   PetscErrorCode ierr;
31613f74950SBarry Smith   PetscInt       nstash,reallocs;
3178965ea79SLois Curfman McInnes 
3183a40ed3dSBarry Smith   PetscFunctionBegin;
319910cf402Sprj-   if (mdn->donotstash || mat->nooffprocentries) return(0);
3208965ea79SLois Curfman McInnes 
321d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
3228798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
323ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
3243a40ed3dSBarry Smith   PetscFunctionReturn(0);
3258965ea79SLois Curfman McInnes }
3268965ea79SLois Curfman McInnes 
327dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
3288965ea79SLois Curfman McInnes {
32939ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
3306849ba73SBarry Smith   PetscErrorCode ierr;
33113f74950SBarry Smith   PetscInt       i,*row,*col,flg,j,rstart,ncols;
33213f74950SBarry Smith   PetscMPIInt    n;
33387828ca2SBarry Smith   PetscScalar    *val;
3348965ea79SLois Curfman McInnes 
3353a40ed3dSBarry Smith   PetscFunctionBegin;
336910cf402Sprj-   if (!mdn->donotstash && !mat->nooffprocentries) {
3378965ea79SLois Curfman McInnes     /*  wait on receives */
3387ef1d9bdSSatish Balay     while (1) {
3398798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
3407ef1d9bdSSatish Balay       if (!flg) break;
3418965ea79SLois Curfman McInnes 
3427ef1d9bdSSatish Balay       for (i=0; i<n;) {
3437ef1d9bdSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
3442205254eSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
3452205254eSKarl Rupp           if (row[j] != rstart) break;
3462205254eSKarl Rupp         }
3477ef1d9bdSSatish Balay         if (j < n) ncols = j-i;
3487ef1d9bdSSatish Balay         else       ncols = n-i;
3497ef1d9bdSSatish Balay         /* Now assemble all these values with a single function call */
3504b4eb8d3SJed Brown         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
3517ef1d9bdSSatish Balay         i    = j;
3528965ea79SLois Curfman McInnes       }
3537ef1d9bdSSatish Balay     }
3548798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
355910cf402Sprj-   }
3568965ea79SLois Curfman McInnes 
35739ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
35839ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
3598965ea79SLois Curfman McInnes 
3606d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
36139ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
3628965ea79SLois Curfman McInnes   }
3633a40ed3dSBarry Smith   PetscFunctionReturn(0);
3648965ea79SLois Curfman McInnes }
3658965ea79SLois Curfman McInnes 
366dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
3678965ea79SLois Curfman McInnes {
368dfbe8321SBarry Smith   PetscErrorCode ierr;
36939ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
3703a40ed3dSBarry Smith 
3713a40ed3dSBarry Smith   PetscFunctionBegin;
3723a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
3733a40ed3dSBarry Smith   PetscFunctionReturn(0);
3748965ea79SLois Curfman McInnes }
3758965ea79SLois Curfman McInnes 
376637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
3778965ea79SLois Curfman McInnes {
37839ddd567SLois Curfman McInnes   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
3796849ba73SBarry Smith   PetscErrorCode    ierr;
380637a0070SStefano Zampini   PetscInt          i,len,*lrows;
381637a0070SStefano Zampini 
382637a0070SStefano Zampini   PetscFunctionBegin;
383637a0070SStefano Zampini   /* get locally owned rows */
384637a0070SStefano Zampini   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
385637a0070SStefano Zampini   /* fix right hand side if needed */
386637a0070SStefano Zampini   if (x && b) {
38797b48c8fSBarry Smith     const PetscScalar *xx;
38897b48c8fSBarry Smith     PetscScalar       *bb;
3898965ea79SLois Curfman McInnes 
39097b48c8fSBarry Smith     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
391637a0070SStefano Zampini     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
392637a0070SStefano Zampini     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
39397b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
394637a0070SStefano Zampini     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
39597b48c8fSBarry Smith   }
396637a0070SStefano Zampini   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
397e2eb51b1SBarry Smith   if (diag != 0.0) {
398637a0070SStefano Zampini     Vec d;
399b9679d65SBarry Smith 
400637a0070SStefano Zampini     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
401637a0070SStefano Zampini     ierr = VecSet(d,diag);CHKERRQ(ierr);
402637a0070SStefano Zampini     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
403637a0070SStefano Zampini     ierr = VecDestroy(&d);CHKERRQ(ierr);
404b9679d65SBarry Smith   }
405606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4063a40ed3dSBarry Smith   PetscFunctionReturn(0);
4078965ea79SLois Curfman McInnes }
4088965ea79SLois Curfman McInnes 
409cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
410cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
411cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
412cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
413cc2e6a90SBarry Smith 
414dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4158965ea79SLois Curfman McInnes {
41639ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
417dfbe8321SBarry Smith   PetscErrorCode    ierr;
418637a0070SStefano Zampini   const PetscScalar *ax;
419637a0070SStefano Zampini   PetscScalar       *ay;
420c456f294SBarry Smith 
4213a40ed3dSBarry Smith   PetscFunctionBegin;
422637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
423637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
424637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
425637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
426637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
427637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
428637a0070SStefano Zampini   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4293a40ed3dSBarry Smith   PetscFunctionReturn(0);
4308965ea79SLois Curfman McInnes }
4318965ea79SLois Curfman McInnes 
432dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4338965ea79SLois Curfman McInnes {
43439ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
435dfbe8321SBarry Smith   PetscErrorCode    ierr;
436637a0070SStefano Zampini   const PetscScalar *ax;
437637a0070SStefano Zampini   PetscScalar       *ay;
438c456f294SBarry Smith 
4393a40ed3dSBarry Smith   PetscFunctionBegin;
440637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
441637a0070SStefano Zampini   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
442637a0070SStefano Zampini   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
443637a0070SStefano Zampini   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
444637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
445637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
446637a0070SStefano Zampini   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
4473a40ed3dSBarry Smith   PetscFunctionReturn(0);
4488965ea79SLois Curfman McInnes }
4498965ea79SLois Curfman McInnes 
450dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
451096963f5SLois Curfman McInnes {
452096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
453dfbe8321SBarry Smith   PetscErrorCode    ierr;
454637a0070SStefano Zampini   const PetscScalar *ax;
455637a0070SStefano Zampini   PetscScalar       *ay;
456096963f5SLois Curfman McInnes 
4573a40ed3dSBarry Smith   PetscFunctionBegin;
458637a0070SStefano Zampini   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
459637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
460637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
461637a0070SStefano Zampini   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
462637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
463637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
464637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
465637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
4663a40ed3dSBarry Smith   PetscFunctionReturn(0);
467096963f5SLois Curfman McInnes }
468096963f5SLois Curfman McInnes 
469dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
470096963f5SLois Curfman McInnes {
471096963f5SLois Curfman McInnes   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
472dfbe8321SBarry Smith   PetscErrorCode    ierr;
473637a0070SStefano Zampini   const PetscScalar *ax;
474637a0070SStefano Zampini   PetscScalar       *ay;
475096963f5SLois Curfman McInnes 
4763a40ed3dSBarry Smith   PetscFunctionBegin;
4773501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
478637a0070SStefano Zampini   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
479637a0070SStefano Zampini   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
480637a0070SStefano Zampini   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
481637a0070SStefano Zampini   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
482637a0070SStefano Zampini   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
483637a0070SStefano Zampini   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
484637a0070SStefano Zampini   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
4853a40ed3dSBarry Smith   PetscFunctionReturn(0);
486096963f5SLois Curfman McInnes }
487096963f5SLois Curfman McInnes 
488dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
4898965ea79SLois Curfman McInnes {
49039ddd567SLois Curfman McInnes   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
491dfbe8321SBarry Smith   PetscErrorCode    ierr;
492637a0070SStefano Zampini   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
49387828ca2SBarry Smith   PetscScalar       *x,zero = 0.0;
494637a0070SStefano Zampini   const PetscScalar *av;
495ed3cc1f0SBarry Smith 
4963a40ed3dSBarry Smith   PetscFunctionBegin;
4972dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
4981ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
499096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
500e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
501d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
502d0f46423SBarry Smith   radd = A->rmap->rstart*m;
503637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
504637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
50544cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
506637a0070SStefano Zampini     x[i] = av[radd + i*lda + i];
507096963f5SLois Curfman McInnes   }
508637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
5091ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5103a40ed3dSBarry Smith   PetscFunctionReturn(0);
5118965ea79SLois Curfman McInnes }
5128965ea79SLois Curfman McInnes 
513dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5148965ea79SLois Curfman McInnes {
5153501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
516dfbe8321SBarry Smith   PetscErrorCode ierr;
517ed3cc1f0SBarry Smith 
5183a40ed3dSBarry Smith   PetscFunctionBegin;
519aa482453SBarry Smith #if defined(PETSC_USE_LOG)
520d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
5218965ea79SLois Curfman McInnes #endif
5228798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
5236bf464f9SBarry Smith   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
5246bf464f9SBarry Smith   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
525637a0070SStefano Zampini   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
526*6947451fSStefano Zampini   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
52701b82886SBarry Smith 
528bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
529dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
5308baccfbdSHong Zhang 
53149a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
5328baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
5338572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
5348572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
5358572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
536*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
537*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
538d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
539d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
5408baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5418baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
5428baccfbdSHong Zhang #endif
543bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
5444222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5454222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
546bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
547bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
54852c5f739Sprj-   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
54952c5f739Sprj-   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
5508baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5518baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
55286aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
55386aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
554*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
555*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
556*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
557*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
558*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
559*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
5603a40ed3dSBarry Smith   PetscFunctionReturn(0);
5618965ea79SLois Curfman McInnes }
56239ddd567SLois Curfman McInnes 
56352c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
56452c5f739Sprj- 
5659804daf3SBarry Smith #include <petscdraw.h>
5666849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
5678965ea79SLois Curfman McInnes {
56839ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
569dfbe8321SBarry Smith   PetscErrorCode    ierr;
5707da1fb6eSBarry Smith   PetscMPIInt       rank = mdn->rank;
57119fd82e9SBarry Smith   PetscViewerType   vtype;
572ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
573b0a32e0cSBarry Smith   PetscViewer       sviewer;
574f3ef73ceSBarry Smith   PetscViewerFormat format;
5758965ea79SLois Curfman McInnes 
5763a40ed3dSBarry Smith   PetscFunctionBegin;
577251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
578251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
57932077d6dSBarry Smith   if (iascii) {
580b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
581b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
582456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
5834e220ebcSLois Curfman McInnes       MatInfo info;
584888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
5851575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
5867b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
587b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
5881575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
589637a0070SStefano Zampini       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
5903a40ed3dSBarry Smith       PetscFunctionReturn(0);
591fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
5923a40ed3dSBarry Smith       PetscFunctionReturn(0);
5938965ea79SLois Curfman McInnes     }
594f1af5d2fSBarry Smith   } else if (isdraw) {
595b0a32e0cSBarry Smith     PetscDraw draw;
596ace3abfcSBarry Smith     PetscBool isnull;
597f1af5d2fSBarry Smith 
598b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
599b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
600f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
601f1af5d2fSBarry Smith   }
60277ed5343SBarry Smith 
6037da1fb6eSBarry Smith   {
6048965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6058965ea79SLois Curfman McInnes     Mat         A;
606d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
607ba8c8a56SBarry Smith     PetscInt    *cols;
608ba8c8a56SBarry Smith     PetscScalar *vals;
6098965ea79SLois Curfman McInnes 
610ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
6118965ea79SLois Curfman McInnes     if (!rank) {
612f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
6133a40ed3dSBarry Smith     } else {
614f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
6158965ea79SLois Curfman McInnes     }
6167adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
617878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
6180298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
6193bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
6208965ea79SLois Curfman McInnes 
62139ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
62239ddd567SLois Curfman McInnes        but it's quick for now */
62351022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6242205254eSKarl Rupp 
6252205254eSKarl Rupp     row = mat->rmap->rstart;
6262205254eSKarl Rupp     m   = mdn->A->rmap->n;
6278965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
628ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
629ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
630ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
63139ddd567SLois Curfman McInnes       row++;
6328965ea79SLois Curfman McInnes     }
6338965ea79SLois Curfman McInnes 
6346d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6356d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6363f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
637b9b97703SBarry Smith     if (!rank) {
6381a9d3c3cSBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
6397da1fb6eSBarry Smith       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
6408965ea79SLois Curfman McInnes     }
6413f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
642b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6436bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
6448965ea79SLois Curfman McInnes   }
6453a40ed3dSBarry Smith   PetscFunctionReturn(0);
6468965ea79SLois Curfman McInnes }
6478965ea79SLois Curfman McInnes 
648dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
6498965ea79SLois Curfman McInnes {
650dfbe8321SBarry Smith   PetscErrorCode ierr;
651ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw,issocket;
6528965ea79SLois Curfman McInnes 
653433994e6SBarry Smith   PetscFunctionBegin;
654251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
655251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
656251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
657251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
6580f5bd95cSBarry Smith 
65932077d6dSBarry Smith   if (iascii || issocket || isdraw) {
660f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
6610f5bd95cSBarry Smith   } else if (isbinary) {
6628491ab44SLisandro Dalcin     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
66311aeaf0aSBarry Smith   }
6643a40ed3dSBarry Smith   PetscFunctionReturn(0);
6658965ea79SLois Curfman McInnes }
6668965ea79SLois Curfman McInnes 
667dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
6688965ea79SLois Curfman McInnes {
6693501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
6703501a2bdSLois Curfman McInnes   Mat            mdn  = mat->A;
671dfbe8321SBarry Smith   PetscErrorCode ierr;
6723966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
6738965ea79SLois Curfman McInnes 
6743a40ed3dSBarry Smith   PetscFunctionBegin;
6754e220ebcSLois Curfman McInnes   info->block_size = 1.0;
6762205254eSKarl Rupp 
6774e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
6782205254eSKarl Rupp 
6794e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
6804e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
6818965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
6824e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
6834e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
6844e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
6854e220ebcSLois Curfman McInnes     info->memory       = isend[3];
6864e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
6878965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
6883966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
6892205254eSKarl Rupp 
6904e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6914e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
6924e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
6934e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
6944e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
6958965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
6963966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
6972205254eSKarl Rupp 
6984e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
6994e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7004e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7014e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7024e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7038965ea79SLois Curfman McInnes   }
7044e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7054e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7064e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7073a40ed3dSBarry Smith   PetscFunctionReturn(0);
7088965ea79SLois Curfman McInnes }
7098965ea79SLois Curfman McInnes 
710ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
7118965ea79SLois Curfman McInnes {
71239ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
713dfbe8321SBarry Smith   PetscErrorCode ierr;
7148965ea79SLois Curfman McInnes 
7153a40ed3dSBarry Smith   PetscFunctionBegin;
71612c028f9SKris Buschelman   switch (op) {
717512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
71812c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
71912c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
72043674050SBarry Smith     MatCheckPreallocated(A,1);
7214e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
72212c028f9SKris Buschelman     break;
72312c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
72443674050SBarry Smith     MatCheckPreallocated(A,1);
7254e0d8c25SBarry Smith     a->roworiented = flg;
7264e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
72712c028f9SKris Buschelman     break;
7284e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
72913fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
73012c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
731071fcb05SBarry Smith   case MAT_SORTED_FULL:
732290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
73312c028f9SKris Buschelman     break;
73412c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
7354e0d8c25SBarry Smith     a->donotstash = flg;
73612c028f9SKris Buschelman     break;
73777e54ba9SKris Buschelman   case MAT_SYMMETRIC:
73877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7399a4540c5SBarry Smith   case MAT_HERMITIAN:
7409a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
741600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
7425d7aebe8SStefano Zampini   case MAT_IGNORE_ZERO_ENTRIES:
743290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
74477e54ba9SKris Buschelman     break;
74512c028f9SKris Buschelman   default:
746e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
7473a40ed3dSBarry Smith   }
7483a40ed3dSBarry Smith   PetscFunctionReturn(0);
7498965ea79SLois Curfman McInnes }
7508965ea79SLois Curfman McInnes 
751dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
7525b2fa520SLois Curfman McInnes {
7535b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
754637a0070SStefano Zampini   const PetscScalar *l;
755637a0070SStefano Zampini   PetscScalar       x,*v,*vv,*r;
756dfbe8321SBarry Smith   PetscErrorCode    ierr;
757637a0070SStefano Zampini   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
7585b2fa520SLois Curfman McInnes 
7595b2fa520SLois Curfman McInnes   PetscFunctionBegin;
760637a0070SStefano Zampini   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
761637a0070SStefano Zampini   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
76272d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
7635b2fa520SLois Curfman McInnes   if (ll) {
76472d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
765637a0070SStefano Zampini     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
766bca11509SBarry Smith     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
7675b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
7685b2fa520SLois Curfman McInnes       x = l[i];
769637a0070SStefano Zampini       v = vv + i;
770637a0070SStefano Zampini       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
7715b2fa520SLois Curfman McInnes     }
772bca11509SBarry Smith     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
773637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
7745b2fa520SLois Curfman McInnes   }
7755b2fa520SLois Curfman McInnes   if (rr) {
776637a0070SStefano Zampini     const PetscScalar *ar;
777637a0070SStefano Zampini 
778175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
779e32f2f54SBarry Smith     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
780637a0070SStefano Zampini     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
781637a0070SStefano Zampini     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
782637a0070SStefano Zampini     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
783637a0070SStefano Zampini     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
784637a0070SStefano Zampini     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
7855b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
7865b2fa520SLois Curfman McInnes       x = r[i];
787637a0070SStefano Zampini       v = vv + i*lda;
7882205254eSKarl Rupp       for (j=0; j<m; j++) (*v++) *= x;
7895b2fa520SLois Curfman McInnes     }
790637a0070SStefano Zampini     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
791637a0070SStefano Zampini     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
7925b2fa520SLois Curfman McInnes   }
793637a0070SStefano Zampini   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
7945b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
7955b2fa520SLois Curfman McInnes }
7965b2fa520SLois Curfman McInnes 
797dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
798096963f5SLois Curfman McInnes {
7993501a2bdSLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
800dfbe8321SBarry Smith   PetscErrorCode    ierr;
80113f74950SBarry Smith   PetscInt          i,j;
802329f5518SBarry Smith   PetscReal         sum = 0.0;
803637a0070SStefano Zampini   const PetscScalar *av,*v;
8043501a2bdSLois Curfman McInnes 
8053a40ed3dSBarry Smith   PetscFunctionBegin;
806637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
807637a0070SStefano Zampini   v    = av;
8083501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
809064f8208SBarry Smith     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
8103501a2bdSLois Curfman McInnes   } else {
8113501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
812d0f46423SBarry Smith       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
813329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
8143501a2bdSLois Curfman McInnes       }
815b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
8168f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
817dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
8183a40ed3dSBarry Smith     } else if (type == NORM_1) {
819329f5518SBarry Smith       PetscReal *tmp,*tmp2;
820580bdb30SBarry Smith       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
821064f8208SBarry Smith       *nrm = 0.0;
822637a0070SStefano Zampini       v    = av;
823d0f46423SBarry Smith       for (j=0; j<mdn->A->cmap->n; j++) {
824d0f46423SBarry Smith         for (i=0; i<mdn->A->rmap->n; i++) {
82567e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
8263501a2bdSLois Curfman McInnes         }
8273501a2bdSLois Curfman McInnes       }
828b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
829d0f46423SBarry Smith       for (j=0; j<A->cmap->N; j++) {
830064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8313501a2bdSLois Curfman McInnes       }
8328627564fSBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
833d0f46423SBarry Smith       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
8343a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
835329f5518SBarry Smith       PetscReal ntemp;
8363501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
837b2566f29SBarry Smith       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
838ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
8393501a2bdSLois Curfman McInnes   }
840637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
8413a40ed3dSBarry Smith   PetscFunctionReturn(0);
8423501a2bdSLois Curfman McInnes }
8433501a2bdSLois Curfman McInnes 
844fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
8453501a2bdSLois Curfman McInnes {
8463501a2bdSLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
8473501a2bdSLois Curfman McInnes   Mat            B;
848d0f46423SBarry Smith   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
8496849ba73SBarry Smith   PetscErrorCode ierr;
850637a0070SStefano Zampini   PetscInt       j,i,lda;
85187828ca2SBarry Smith   PetscScalar    *v;
8523501a2bdSLois Curfman McInnes 
8533a40ed3dSBarry Smith   PetscFunctionBegin;
854cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
855ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
856d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
8577adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
8580298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
859637a0070SStefano Zampini   } else B = *matout;
8603501a2bdSLois Curfman McInnes 
861637a0070SStefano Zampini   m    = a->A->rmap->n; n = a->A->cmap->n;
862637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
863637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
864785e854fSJed Brown   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
8653501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
8661acff37aSSatish Balay   for (j=0; j<n; j++) {
8673501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
868637a0070SStefano Zampini     v   += lda;
8693501a2bdSLois Curfman McInnes   }
870637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
871606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
8726d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8736d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
8753501a2bdSLois Curfman McInnes     *matout = B;
8763501a2bdSLois Curfman McInnes   } else {
87728be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
8783501a2bdSLois Curfman McInnes   }
8793a40ed3dSBarry Smith   PetscFunctionReturn(0);
880096963f5SLois Curfman McInnes }
881096963f5SLois Curfman McInnes 
8826849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
88352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
8848965ea79SLois Curfman McInnes 
8854994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A)
886273d9f13SBarry Smith {
887dfbe8321SBarry Smith   PetscErrorCode ierr;
888273d9f13SBarry Smith 
889273d9f13SBarry Smith   PetscFunctionBegin;
89018992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
89118992e5dSStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
89218992e5dSStefano Zampini   if (!A->preallocated) {
893273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
89418992e5dSStefano Zampini   }
895273d9f13SBarry Smith   PetscFunctionReturn(0);
896273d9f13SBarry Smith }
897273d9f13SBarry Smith 
898488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
899488007eeSBarry Smith {
900488007eeSBarry Smith   PetscErrorCode ierr;
901488007eeSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
902488007eeSBarry Smith 
903488007eeSBarry Smith   PetscFunctionBegin;
904488007eeSBarry Smith   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
905488007eeSBarry Smith   PetscFunctionReturn(0);
906488007eeSBarry Smith }
907488007eeSBarry Smith 
9087087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat)
909ba337c44SJed Brown {
910ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
911ba337c44SJed Brown   PetscErrorCode ierr;
912ba337c44SJed Brown 
913ba337c44SJed Brown   PetscFunctionBegin;
914ba337c44SJed Brown   ierr = MatConjugate(a->A);CHKERRQ(ierr);
915ba337c44SJed Brown   PetscFunctionReturn(0);
916ba337c44SJed Brown }
917ba337c44SJed Brown 
918ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A)
919ba337c44SJed Brown {
920ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
921ba337c44SJed Brown   PetscErrorCode ierr;
922ba337c44SJed Brown 
923ba337c44SJed Brown   PetscFunctionBegin;
924ba337c44SJed Brown   ierr = MatRealPart(a->A);CHKERRQ(ierr);
925ba337c44SJed Brown   PetscFunctionReturn(0);
926ba337c44SJed Brown }
927ba337c44SJed Brown 
928ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
929ba337c44SJed Brown {
930ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
931ba337c44SJed Brown   PetscErrorCode ierr;
932ba337c44SJed Brown 
933ba337c44SJed Brown   PetscFunctionBegin;
934ba337c44SJed Brown   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
935ba337c44SJed Brown   PetscFunctionReturn(0);
936ba337c44SJed Brown }
937ba337c44SJed Brown 
93849a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
93949a6ff4bSBarry Smith {
94049a6ff4bSBarry Smith   PetscErrorCode ierr;
941637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
94249a6ff4bSBarry Smith 
94349a6ff4bSBarry Smith   PetscFunctionBegin;
944637a0070SStefano Zampini   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
945637a0070SStefano Zampini   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
946637a0070SStefano Zampini   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
94749a6ff4bSBarry Smith   PetscFunctionReturn(0);
94849a6ff4bSBarry Smith }
94949a6ff4bSBarry Smith 
95052c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
95152c5f739Sprj- 
9520716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
9530716a85fSBarry Smith {
9540716a85fSBarry Smith   PetscErrorCode ierr;
9550716a85fSBarry Smith   PetscInt       i,n;
9560716a85fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
9570716a85fSBarry Smith   PetscReal      *work;
9580716a85fSBarry Smith 
9590716a85fSBarry Smith   PetscFunctionBegin;
9600298fd71SBarry Smith   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
961785e854fSJed Brown   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
9620716a85fSBarry Smith   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
9630716a85fSBarry Smith   if (type == NORM_2) {
9640716a85fSBarry Smith     for (i=0; i<n; i++) work[i] *= work[i];
9650716a85fSBarry Smith   }
9660716a85fSBarry Smith   if (type == NORM_INFINITY) {
967b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
9680716a85fSBarry Smith   } else {
969b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
9700716a85fSBarry Smith   }
9710716a85fSBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
9720716a85fSBarry Smith   if (type == NORM_2) {
9738f1a2a5eSBarry Smith     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
9740716a85fSBarry Smith   }
9750716a85fSBarry Smith   PetscFunctionReturn(0);
9760716a85fSBarry Smith }
9770716a85fSBarry Smith 
978637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
979*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
980*6947451fSStefano Zampini {
981*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
982*6947451fSStefano Zampini   PetscErrorCode ierr;
983*6947451fSStefano Zampini   PetscInt       lda;
984*6947451fSStefano Zampini 
985*6947451fSStefano Zampini   PetscFunctionBegin;
986*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
987*6947451fSStefano Zampini   if (!a->cvec) {
988*6947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
989*6947451fSStefano Zampini   }
990*6947451fSStefano Zampini   a->vecinuse = col + 1;
991*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
992*6947451fSStefano Zampini   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
993*6947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
994*6947451fSStefano Zampini   *v   = a->cvec;
995*6947451fSStefano Zampini   PetscFunctionReturn(0);
996*6947451fSStefano Zampini }
997*6947451fSStefano Zampini 
998*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
999*6947451fSStefano Zampini {
1000*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1001*6947451fSStefano Zampini   PetscErrorCode ierr;
1002*6947451fSStefano Zampini 
1003*6947451fSStefano Zampini   PetscFunctionBegin;
1004*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1005*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1006*6947451fSStefano Zampini   a->vecinuse = 0;
1007*6947451fSStefano Zampini   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1008*6947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1009*6947451fSStefano Zampini   *v   = NULL;
1010*6947451fSStefano Zampini   PetscFunctionReturn(0);
1011*6947451fSStefano Zampini }
1012*6947451fSStefano Zampini 
1013*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1014*6947451fSStefano Zampini {
1015*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1016*6947451fSStefano Zampini   PetscInt       lda;
1017*6947451fSStefano Zampini   PetscErrorCode ierr;
1018*6947451fSStefano Zampini 
1019*6947451fSStefano Zampini   PetscFunctionBegin;
1020*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1021*6947451fSStefano Zampini   if (!a->cvec) {
1022*6947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1023*6947451fSStefano Zampini   }
1024*6947451fSStefano Zampini   a->vecinuse = col + 1;
1025*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1026*6947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1027*6947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1028*6947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1029*6947451fSStefano Zampini   *v   = a->cvec;
1030*6947451fSStefano Zampini   PetscFunctionReturn(0);
1031*6947451fSStefano Zampini }
1032*6947451fSStefano Zampini 
1033*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1034*6947451fSStefano Zampini {
1035*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036*6947451fSStefano Zampini   PetscErrorCode ierr;
1037*6947451fSStefano Zampini 
1038*6947451fSStefano Zampini   PetscFunctionBegin;
1039*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1040*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1041*6947451fSStefano Zampini   a->vecinuse = 0;
1042*6947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1043*6947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1044*6947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1045*6947451fSStefano Zampini   *v   = NULL;
1046*6947451fSStefano Zampini   PetscFunctionReturn(0);
1047*6947451fSStefano Zampini }
1048*6947451fSStefano Zampini 
1049*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1050*6947451fSStefano Zampini {
1051*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1052*6947451fSStefano Zampini   PetscErrorCode ierr;
1053*6947451fSStefano Zampini   PetscInt       lda;
1054*6947451fSStefano Zampini 
1055*6947451fSStefano Zampini   PetscFunctionBegin;
1056*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1057*6947451fSStefano Zampini   if (!a->cvec) {
1058*6947451fSStefano Zampini     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1059*6947451fSStefano Zampini   }
1060*6947451fSStefano Zampini   a->vecinuse = col + 1;
1061*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1062*6947451fSStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1063*6947451fSStefano Zampini   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1064*6947451fSStefano Zampini   *v   = a->cvec;
1065*6947451fSStefano Zampini   PetscFunctionReturn(0);
1066*6947451fSStefano Zampini }
1067*6947451fSStefano Zampini 
1068*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1069*6947451fSStefano Zampini {
1070*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1071*6947451fSStefano Zampini   PetscErrorCode ierr;
1072*6947451fSStefano Zampini 
1073*6947451fSStefano Zampini   PetscFunctionBegin;
1074*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1075*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1076*6947451fSStefano Zampini   a->vecinuse = 0;
1077*6947451fSStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1078*6947451fSStefano Zampini   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1079*6947451fSStefano Zampini   *v   = NULL;
1080*6947451fSStefano Zampini   PetscFunctionReturn(0);
1081*6947451fSStefano Zampini }
1082*6947451fSStefano Zampini 
1083637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1084637a0070SStefano Zampini {
1085637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1086637a0070SStefano Zampini   PetscErrorCode ierr;
1087637a0070SStefano Zampini 
1088637a0070SStefano Zampini   PetscFunctionBegin;
1089*6947451fSStefano Zampini   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1090637a0070SStefano Zampini   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1091637a0070SStefano Zampini   PetscFunctionReturn(0);
1092637a0070SStefano Zampini }
1093637a0070SStefano Zampini 
1094637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1095637a0070SStefano Zampini {
1096637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1097637a0070SStefano Zampini   PetscErrorCode ierr;
1098637a0070SStefano Zampini 
1099637a0070SStefano Zampini   PetscFunctionBegin;
1100*6947451fSStefano Zampini   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1101637a0070SStefano Zampini   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1102637a0070SStefano Zampini   PetscFunctionReturn(0);
1103637a0070SStefano Zampini }
1104637a0070SStefano Zampini 
1105637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1106637a0070SStefano Zampini {
1107637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1108637a0070SStefano Zampini   PetscErrorCode ierr;
1109637a0070SStefano Zampini 
1110637a0070SStefano Zampini   PetscFunctionBegin;
1111637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1112637a0070SStefano Zampini   PetscFunctionReturn(0);
1113637a0070SStefano Zampini }
1114637a0070SStefano Zampini 
1115637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1116637a0070SStefano Zampini {
1117637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1118637a0070SStefano Zampini   PetscErrorCode ierr;
1119637a0070SStefano Zampini 
1120637a0070SStefano Zampini   PetscFunctionBegin;
1121637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1122637a0070SStefano Zampini   PetscFunctionReturn(0);
1123637a0070SStefano Zampini }
1124637a0070SStefano Zampini 
1125637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1126637a0070SStefano Zampini {
1127637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1128637a0070SStefano Zampini   PetscErrorCode ierr;
1129637a0070SStefano Zampini 
1130637a0070SStefano Zampini   PetscFunctionBegin;
1131637a0070SStefano Zampini   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1132637a0070SStefano Zampini   PetscFunctionReturn(0);
1133637a0070SStefano Zampini }
1134637a0070SStefano Zampini 
1135637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1136637a0070SStefano Zampini {
1137637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1138637a0070SStefano Zampini   PetscErrorCode ierr;
1139637a0070SStefano Zampini 
1140637a0070SStefano Zampini   PetscFunctionBegin;
1141637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1142637a0070SStefano Zampini   PetscFunctionReturn(0);
1143637a0070SStefano Zampini }
1144637a0070SStefano Zampini 
1145637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1146637a0070SStefano Zampini {
1147637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1148637a0070SStefano Zampini   PetscErrorCode ierr;
1149637a0070SStefano Zampini 
1150637a0070SStefano Zampini   PetscFunctionBegin;
1151637a0070SStefano Zampini   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1152637a0070SStefano Zampini   PetscFunctionReturn(0);
1153637a0070SStefano Zampini }
1154637a0070SStefano Zampini 
1155637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1156637a0070SStefano Zampini {
1157637a0070SStefano Zampini   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158637a0070SStefano Zampini   PetscErrorCode ierr;
1159637a0070SStefano Zampini 
1160637a0070SStefano Zampini   PetscFunctionBegin;
1161637a0070SStefano Zampini   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1162637a0070SStefano Zampini   PetscFunctionReturn(0);
1163637a0070SStefano Zampini }
1164637a0070SStefano Zampini 
1165*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1166*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1167*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1168*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1169*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1170*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1171*6947451fSStefano Zampini 
1172637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1173637a0070SStefano Zampini {
1174637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1175637a0070SStefano Zampini   PetscErrorCode ierr;
1176637a0070SStefano Zampini 
1177637a0070SStefano Zampini   PetscFunctionBegin;
1178*6947451fSStefano Zampini   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1179637a0070SStefano Zampini   if (d->A) {
1180637a0070SStefano Zampini     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1181637a0070SStefano Zampini   }
1182637a0070SStefano Zampini   mat->boundtocpu = bind;
1183*6947451fSStefano Zampini   if (!bind) {
1184*6947451fSStefano Zampini     PetscBool iscuda;
1185*6947451fSStefano Zampini 
1186*6947451fSStefano Zampini     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1187*6947451fSStefano Zampini     if (!iscuda) {
1188*6947451fSStefano Zampini       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1189*6947451fSStefano Zampini     }
1190*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1191*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1192*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1193*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1194*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1195*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1196*6947451fSStefano Zampini   } else {
1197*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1198*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1199*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1200*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1201*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1202*6947451fSStefano Zampini     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1203*6947451fSStefano Zampini   }
1204637a0070SStefano Zampini   PetscFunctionReturn(0);
1205637a0070SStefano Zampini }
1206637a0070SStefano Zampini 
1207637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1208637a0070SStefano Zampini {
1209637a0070SStefano Zampini   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1210637a0070SStefano Zampini   PetscErrorCode ierr;
1211637a0070SStefano Zampini   PetscBool      iscuda;
1212637a0070SStefano Zampini 
1213637a0070SStefano Zampini   PetscFunctionBegin;
1214637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1215637a0070SStefano Zampini   if (!iscuda) PetscFunctionReturn(0);
1216637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1217637a0070SStefano Zampini   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1218637a0070SStefano Zampini   if (!d->A) {
1219637a0070SStefano Zampini     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1220637a0070SStefano Zampini     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1221637a0070SStefano Zampini     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1222637a0070SStefano Zampini   }
1223637a0070SStefano Zampini   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1224637a0070SStefano Zampini   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1225637a0070SStefano Zampini   A->preallocated = PETSC_TRUE;
1226637a0070SStefano Zampini   PetscFunctionReturn(0);
1227637a0070SStefano Zampini }
1228637a0070SStefano Zampini #endif
1229637a0070SStefano Zampini 
123073a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
123173a71a0fSBarry Smith {
123273a71a0fSBarry Smith   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
123373a71a0fSBarry Smith   PetscErrorCode ierr;
123473a71a0fSBarry Smith 
123573a71a0fSBarry Smith   PetscFunctionBegin;
1236637a0070SStefano Zampini   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
123773a71a0fSBarry Smith   PetscFunctionReturn(0);
123873a71a0fSBarry Smith }
123973a71a0fSBarry Smith 
124052c5f739Sprj- PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1241fd4e9aacSBarry Smith 
12423b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
12433b49f96aSBarry Smith {
12443b49f96aSBarry Smith   PetscFunctionBegin;
12453b49f96aSBarry Smith   *missing = PETSC_FALSE;
12463b49f96aSBarry Smith   PetscFunctionReturn(0);
12473b49f96aSBarry Smith }
12483b49f96aSBarry Smith 
12494222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1250cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1251cc48ffa7SToby Isaac 
12528965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
125309dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
125409dc0095SBarry Smith                                         MatGetRow_MPIDense,
125509dc0095SBarry Smith                                         MatRestoreRow_MPIDense,
125609dc0095SBarry Smith                                         MatMult_MPIDense,
125797304618SKris Buschelman                                 /*  4*/ MatMultAdd_MPIDense,
12587c922b88SBarry Smith                                         MatMultTranspose_MPIDense,
12597c922b88SBarry Smith                                         MatMultTransposeAdd_MPIDense,
12608965ea79SLois Curfman McInnes                                         0,
126109dc0095SBarry Smith                                         0,
126209dc0095SBarry Smith                                         0,
126397304618SKris Buschelman                                 /* 10*/ 0,
126409dc0095SBarry Smith                                         0,
126509dc0095SBarry Smith                                         0,
126609dc0095SBarry Smith                                         0,
126709dc0095SBarry Smith                                         MatTranspose_MPIDense,
126897304618SKris Buschelman                                 /* 15*/ MatGetInfo_MPIDense,
12696e4ee0c6SHong Zhang                                         MatEqual_MPIDense,
127009dc0095SBarry Smith                                         MatGetDiagonal_MPIDense,
12715b2fa520SLois Curfman McInnes                                         MatDiagonalScale_MPIDense,
127209dc0095SBarry Smith                                         MatNorm_MPIDense,
127397304618SKris Buschelman                                 /* 20*/ MatAssemblyBegin_MPIDense,
127409dc0095SBarry Smith                                         MatAssemblyEnd_MPIDense,
127509dc0095SBarry Smith                                         MatSetOption_MPIDense,
127609dc0095SBarry Smith                                         MatZeroEntries_MPIDense,
1277d519adbfSMatthew Knepley                                 /* 24*/ MatZeroRows_MPIDense,
1278919b68f7SBarry Smith                                         0,
127901b82886SBarry Smith                                         0,
128001b82886SBarry Smith                                         0,
128101b82886SBarry Smith                                         0,
12824994cf47SJed Brown                                 /* 29*/ MatSetUp_MPIDense,
1283273d9f13SBarry Smith                                         0,
128409dc0095SBarry Smith                                         0,
1285c56a70eeSBarry Smith                                         MatGetDiagonalBlock_MPIDense,
12868c778c55SBarry Smith                                         0,
1287d519adbfSMatthew Knepley                                 /* 34*/ MatDuplicate_MPIDense,
128809dc0095SBarry Smith                                         0,
128909dc0095SBarry Smith                                         0,
129009dc0095SBarry Smith                                         0,
129109dc0095SBarry Smith                                         0,
1292d519adbfSMatthew Knepley                                 /* 39*/ MatAXPY_MPIDense,
12937dae84e0SHong Zhang                                         MatCreateSubMatrices_MPIDense,
129409dc0095SBarry Smith                                         0,
129509dc0095SBarry Smith                                         MatGetValues_MPIDense,
129609dc0095SBarry Smith                                         0,
1297d519adbfSMatthew Knepley                                 /* 44*/ 0,
129809dc0095SBarry Smith                                         MatScale_MPIDense,
12997d68702bSBarry Smith                                         MatShift_Basic,
130009dc0095SBarry Smith                                         0,
130109dc0095SBarry Smith                                         0,
130273a71a0fSBarry Smith                                 /* 49*/ MatSetRandom_MPIDense,
130309dc0095SBarry Smith                                         0,
130409dc0095SBarry Smith                                         0,
130509dc0095SBarry Smith                                         0,
130609dc0095SBarry Smith                                         0,
1307d519adbfSMatthew Knepley                                 /* 54*/ 0,
130809dc0095SBarry Smith                                         0,
130909dc0095SBarry Smith                                         0,
131009dc0095SBarry Smith                                         0,
131109dc0095SBarry Smith                                         0,
13127dae84e0SHong Zhang                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1313b9b97703SBarry Smith                                         MatDestroy_MPIDense,
1314b9b97703SBarry Smith                                         MatView_MPIDense,
1315357abbc8SBarry Smith                                         0,
131697304618SKris Buschelman                                         0,
1317d519adbfSMatthew Knepley                                 /* 64*/ 0,
131897304618SKris Buschelman                                         0,
131997304618SKris Buschelman                                         0,
132097304618SKris Buschelman                                         0,
132197304618SKris Buschelman                                         0,
1322d519adbfSMatthew Knepley                                 /* 69*/ 0,
132397304618SKris Buschelman                                         0,
132497304618SKris Buschelman                                         0,
132597304618SKris Buschelman                                         0,
132697304618SKris Buschelman                                         0,
1327d519adbfSMatthew Knepley                                 /* 74*/ 0,
132897304618SKris Buschelman                                         0,
132997304618SKris Buschelman                                         0,
133097304618SKris Buschelman                                         0,
133197304618SKris Buschelman                                         0,
1332d519adbfSMatthew Knepley                                 /* 79*/ 0,
133397304618SKris Buschelman                                         0,
133497304618SKris Buschelman                                         0,
133597304618SKris Buschelman                                         0,
13365bba2384SShri Abhyankar                                 /* 83*/ MatLoad_MPIDense,
1337865e5f61SKris Buschelman                                         0,
1338865e5f61SKris Buschelman                                         0,
1339865e5f61SKris Buschelman                                         0,
1340865e5f61SKris Buschelman                                         0,
1341865e5f61SKris Buschelman                                         0,
13424222ddf1SHong Zhang                                 /* 89*/ 0,
13434222ddf1SHong Zhang                                         0,
1344fd4e9aacSBarry Smith                                         MatMatMultNumeric_MPIDense,
13452fbe02b9SBarry Smith                                         0,
1346ba337c44SJed Brown                                         0,
1347d519adbfSMatthew Knepley                                 /* 94*/ 0,
13484222ddf1SHong Zhang                                         0,
13494222ddf1SHong Zhang                                         0,
1350cc48ffa7SToby Isaac                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1351ba337c44SJed Brown                                         0,
13524222ddf1SHong Zhang                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1353ba337c44SJed Brown                                         0,
1354ba337c44SJed Brown                                         0,
1355ba337c44SJed Brown                                         MatConjugate_MPIDense,
1356ba337c44SJed Brown                                         0,
1357ba337c44SJed Brown                                 /*104*/ 0,
1358ba337c44SJed Brown                                         MatRealPart_MPIDense,
1359ba337c44SJed Brown                                         MatImaginaryPart_MPIDense,
136086d161a7SShri Abhyankar                                         0,
136186d161a7SShri Abhyankar                                         0,
136286d161a7SShri Abhyankar                                 /*109*/ 0,
136386d161a7SShri Abhyankar                                         0,
136486d161a7SShri Abhyankar                                         0,
136549a6ff4bSBarry Smith                                         MatGetColumnVector_MPIDense,
13663b49f96aSBarry Smith                                         MatMissingDiagonal_MPIDense,
136786d161a7SShri Abhyankar                                 /*114*/ 0,
136886d161a7SShri Abhyankar                                         0,
136986d161a7SShri Abhyankar                                         0,
137086d161a7SShri Abhyankar                                         0,
137186d161a7SShri Abhyankar                                         0,
137286d161a7SShri Abhyankar                                 /*119*/ 0,
137386d161a7SShri Abhyankar                                         0,
137486d161a7SShri Abhyankar                                         0,
13750716a85fSBarry Smith                                         0,
13760716a85fSBarry Smith                                         0,
13770716a85fSBarry Smith                                 /*124*/ 0,
13783964eb88SJed Brown                                         MatGetColumnNorms_MPIDense,
13793964eb88SJed Brown                                         0,
13803964eb88SJed Brown                                         0,
13813964eb88SJed Brown                                         0,
13823964eb88SJed Brown                                 /*129*/ 0,
13834222ddf1SHong Zhang                                         0,
13844222ddf1SHong Zhang                                         0,
1385cb20be35SHong Zhang                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
13863964eb88SJed Brown                                         0,
13873964eb88SJed Brown                                 /*134*/ 0,
13883964eb88SJed Brown                                         0,
13893964eb88SJed Brown                                         0,
13903964eb88SJed Brown                                         0,
13913964eb88SJed Brown                                         0,
13923964eb88SJed Brown                                 /*139*/ 0,
1393f9426fe0SMark Adams                                         0,
139494e2cb23SJakub Kruzik                                         0,
139594e2cb23SJakub Kruzik                                         0,
139694e2cb23SJakub Kruzik                                         0,
13974222ddf1SHong Zhang                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
13984222ddf1SHong Zhang                                 /*145*/ 0,
13994222ddf1SHong Zhang                                         0,
14004222ddf1SHong Zhang                                         0
1401ba337c44SJed Brown };
14028965ea79SLois Curfman McInnes 
14037087cfbeSBarry Smith PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1404a23d5eceSKris Buschelman {
1405637a0070SStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1406637a0070SStefano Zampini   PetscBool      iscuda;
1407dfbe8321SBarry Smith   PetscErrorCode ierr;
1408a23d5eceSKris Buschelman 
1409a23d5eceSKris Buschelman   PetscFunctionBegin;
141034ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
141134ef9618SShri Abhyankar   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1412637a0070SStefano Zampini   if (!a->A) {
1413f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
14143bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1415637a0070SStefano Zampini     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1416637a0070SStefano Zampini   }
1417637a0070SStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1418637a0070SStefano Zampini   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1419637a0070SStefano Zampini   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1420637a0070SStefano Zampini   mat->preallocated = PETSC_TRUE;
1421a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1422a23d5eceSKris Buschelman }
1423a23d5eceSKris Buschelman 
142465b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1425cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
14268baccfbdSHong Zhang {
14278ea901baSHong Zhang   Mat            mat_elemental;
14288ea901baSHong Zhang   PetscErrorCode ierr;
142932d7a744SHong Zhang   PetscScalar    *v;
143032d7a744SHong Zhang   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
14318ea901baSHong Zhang 
14328baccfbdSHong Zhang   PetscFunctionBegin;
1433378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1434378336b6SHong Zhang     mat_elemental = *newmat;
1435378336b6SHong Zhang     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1436378336b6SHong Zhang   } else {
1437378336b6SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1438378336b6SHong Zhang     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1439378336b6SHong Zhang     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1440378336b6SHong Zhang     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
144132d7a744SHong Zhang     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1442378336b6SHong Zhang   }
1443378336b6SHong Zhang 
144432d7a744SHong Zhang   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
144532d7a744SHong Zhang   for (i=0; i<N; i++) cols[i] = i;
144632d7a744SHong Zhang   for (i=0; i<m; i++) rows[i] = rstart + i;
14478ea901baSHong Zhang 
1448637a0070SStefano Zampini   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
144932d7a744SHong Zhang   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
145032d7a744SHong Zhang   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
14518ea901baSHong Zhang   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14528ea901baSHong Zhang   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145332d7a744SHong Zhang   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
145432d7a744SHong Zhang   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
14558ea901baSHong Zhang 
1456511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
145728be2f97SBarry Smith     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
14588ea901baSHong Zhang   } else {
14598ea901baSHong Zhang     *newmat = mat_elemental;
14608ea901baSHong Zhang   }
14618baccfbdSHong Zhang   PetscFunctionReturn(0);
14628baccfbdSHong Zhang }
146365b80a83SHong Zhang #endif
14648baccfbdSHong Zhang 
1465af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
146686aefd0dSHong Zhang {
146786aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
146886aefd0dSHong Zhang   PetscErrorCode ierr;
146986aefd0dSHong Zhang 
147086aefd0dSHong Zhang   PetscFunctionBegin;
147186aefd0dSHong Zhang   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
147286aefd0dSHong Zhang   PetscFunctionReturn(0);
147386aefd0dSHong Zhang }
147486aefd0dSHong Zhang 
1475af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
147686aefd0dSHong Zhang {
147786aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
147886aefd0dSHong Zhang   PetscErrorCode ierr;
147986aefd0dSHong Zhang 
148086aefd0dSHong Zhang   PetscFunctionBegin;
148186aefd0dSHong Zhang   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
148286aefd0dSHong Zhang   PetscFunctionReturn(0);
148386aefd0dSHong Zhang }
148486aefd0dSHong Zhang 
148594e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
148694e2cb23SJakub Kruzik {
148794e2cb23SJakub Kruzik   PetscErrorCode ierr;
148894e2cb23SJakub Kruzik   Mat_MPIDense   *mat;
148994e2cb23SJakub Kruzik   PetscInt       m,nloc,N;
149094e2cb23SJakub Kruzik 
149194e2cb23SJakub Kruzik   PetscFunctionBegin;
149294e2cb23SJakub Kruzik   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
149394e2cb23SJakub Kruzik   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
149494e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
149594e2cb23SJakub Kruzik     PetscInt sum;
149694e2cb23SJakub Kruzik 
149794e2cb23SJakub Kruzik     if (n == PETSC_DECIDE) {
149894e2cb23SJakub Kruzik       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
149994e2cb23SJakub Kruzik     }
150094e2cb23SJakub Kruzik     /* Check sum(n) = N */
150194e2cb23SJakub Kruzik     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
150294e2cb23SJakub Kruzik     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
150394e2cb23SJakub Kruzik 
150494e2cb23SJakub Kruzik     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
150594e2cb23SJakub Kruzik   }
150694e2cb23SJakub Kruzik 
150794e2cb23SJakub Kruzik   /* numeric phase */
150894e2cb23SJakub Kruzik   mat = (Mat_MPIDense*)(*outmat)->data;
150994e2cb23SJakub Kruzik   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
151094e2cb23SJakub Kruzik   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151194e2cb23SJakub Kruzik   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151294e2cb23SJakub Kruzik   PetscFunctionReturn(0);
151394e2cb23SJakub Kruzik }
151494e2cb23SJakub Kruzik 
1515637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1516637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1517637a0070SStefano Zampini {
1518637a0070SStefano Zampini   Mat            B;
1519637a0070SStefano Zampini   Mat_MPIDense   *m;
1520637a0070SStefano Zampini   PetscErrorCode ierr;
1521637a0070SStefano Zampini 
1522637a0070SStefano Zampini   PetscFunctionBegin;
1523637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1524637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1525637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1526637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1527637a0070SStefano Zampini   }
1528637a0070SStefano Zampini 
1529637a0070SStefano Zampini   B    = *newmat;
1530637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1531637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1532637a0070SStefano Zampini   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1533637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1534637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1535637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1536637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1537637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1538637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1539637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1540637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1541637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1542637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1543637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1544637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1545637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1546637a0070SStefano Zampini   if (m->A) {
1547637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1548637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1549637a0070SStefano Zampini   }
1550637a0070SStefano Zampini   B->ops->bindtocpu = NULL;
1551637a0070SStefano Zampini   B->offloadmask    = PETSC_OFFLOAD_CPU;
1552637a0070SStefano Zampini   PetscFunctionReturn(0);
1553637a0070SStefano Zampini }
1554637a0070SStefano Zampini 
1555637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1556637a0070SStefano Zampini {
1557637a0070SStefano Zampini   Mat            B;
1558637a0070SStefano Zampini   Mat_MPIDense   *m;
1559637a0070SStefano Zampini   PetscErrorCode ierr;
1560637a0070SStefano Zampini 
1561637a0070SStefano Zampini   PetscFunctionBegin;
1562637a0070SStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
1563637a0070SStefano Zampini     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1564637a0070SStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
1565637a0070SStefano Zampini     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1566637a0070SStefano Zampini   }
1567637a0070SStefano Zampini 
1568637a0070SStefano Zampini   B    = *newmat;
1569637a0070SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1570637a0070SStefano Zampini   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1571637a0070SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1572637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1573637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1574637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1575637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1576637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1577637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1578637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1579637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1580637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1581637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1582637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1583637a0070SStefano Zampini   m    = (Mat_MPIDense*)(B)->data;
1584637a0070SStefano Zampini   if (m->A) {
1585637a0070SStefano Zampini     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1586637a0070SStefano Zampini     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1587637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_BOTH;
1588637a0070SStefano Zampini   } else {
1589637a0070SStefano Zampini     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1590637a0070SStefano Zampini   }
1591637a0070SStefano Zampini   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1592637a0070SStefano Zampini 
1593637a0070SStefano Zampini   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1594637a0070SStefano Zampini   PetscFunctionReturn(0);
1595637a0070SStefano Zampini }
1596637a0070SStefano Zampini #endif
1597637a0070SStefano Zampini 
1598*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1599*6947451fSStefano Zampini {
1600*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1601*6947451fSStefano Zampini   PetscErrorCode ierr;
1602*6947451fSStefano Zampini   PetscInt       lda;
1603*6947451fSStefano Zampini 
1604*6947451fSStefano Zampini   PetscFunctionBegin;
1605*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1606*6947451fSStefano Zampini   if (!a->cvec) {
1607*6947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1608*6947451fSStefano Zampini   }
1609*6947451fSStefano Zampini   a->vecinuse = col + 1;
1610*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1611*6947451fSStefano Zampini   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1612*6947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1613*6947451fSStefano Zampini   *v   = a->cvec;
1614*6947451fSStefano Zampini   PetscFunctionReturn(0);
1615*6947451fSStefano Zampini }
1616*6947451fSStefano Zampini 
1617*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1618*6947451fSStefano Zampini {
1619*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1620*6947451fSStefano Zampini   PetscErrorCode ierr;
1621*6947451fSStefano Zampini 
1622*6947451fSStefano Zampini   PetscFunctionBegin;
1623*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1624*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1625*6947451fSStefano Zampini   a->vecinuse = 0;
1626*6947451fSStefano Zampini   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1627*6947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1628*6947451fSStefano Zampini   *v   = NULL;
1629*6947451fSStefano Zampini   PetscFunctionReturn(0);
1630*6947451fSStefano Zampini }
1631*6947451fSStefano Zampini 
1632*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1633*6947451fSStefano Zampini {
1634*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1635*6947451fSStefano Zampini   PetscErrorCode ierr;
1636*6947451fSStefano Zampini   PetscInt       lda;
1637*6947451fSStefano Zampini 
1638*6947451fSStefano Zampini   PetscFunctionBegin;
1639*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1640*6947451fSStefano Zampini   if (!a->cvec) {
1641*6947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1642*6947451fSStefano Zampini   }
1643*6947451fSStefano Zampini   a->vecinuse = col + 1;
1644*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1645*6947451fSStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1646*6947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1647*6947451fSStefano Zampini   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1648*6947451fSStefano Zampini   *v   = a->cvec;
1649*6947451fSStefano Zampini   PetscFunctionReturn(0);
1650*6947451fSStefano Zampini }
1651*6947451fSStefano Zampini 
1652*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1653*6947451fSStefano Zampini {
1654*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1655*6947451fSStefano Zampini   PetscErrorCode ierr;
1656*6947451fSStefano Zampini 
1657*6947451fSStefano Zampini   PetscFunctionBegin;
1658*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1659*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1660*6947451fSStefano Zampini   a->vecinuse = 0;
1661*6947451fSStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1662*6947451fSStefano Zampini   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1663*6947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1664*6947451fSStefano Zampini   *v   = NULL;
1665*6947451fSStefano Zampini   PetscFunctionReturn(0);
1666*6947451fSStefano Zampini }
1667*6947451fSStefano Zampini 
1668*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1669*6947451fSStefano Zampini {
1670*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1671*6947451fSStefano Zampini   PetscErrorCode ierr;
1672*6947451fSStefano Zampini   PetscInt       lda;
1673*6947451fSStefano Zampini 
1674*6947451fSStefano Zampini   PetscFunctionBegin;
1675*6947451fSStefano Zampini   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1676*6947451fSStefano Zampini   if (!a->cvec) {
1677*6947451fSStefano Zampini     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1678*6947451fSStefano Zampini   }
1679*6947451fSStefano Zampini   a->vecinuse = col + 1;
1680*6947451fSStefano Zampini   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1681*6947451fSStefano Zampini   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1682*6947451fSStefano Zampini   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1683*6947451fSStefano Zampini   *v   = a->cvec;
1684*6947451fSStefano Zampini   PetscFunctionReturn(0);
1685*6947451fSStefano Zampini }
1686*6947451fSStefano Zampini 
1687*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1688*6947451fSStefano Zampini {
1689*6947451fSStefano Zampini   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1690*6947451fSStefano Zampini   PetscErrorCode ierr;
1691*6947451fSStefano Zampini 
1692*6947451fSStefano Zampini   PetscFunctionBegin;
1693*6947451fSStefano Zampini   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1694*6947451fSStefano Zampini   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1695*6947451fSStefano Zampini   a->vecinuse = 0;
1696*6947451fSStefano Zampini   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1697*6947451fSStefano Zampini   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1698*6947451fSStefano Zampini   *v   = NULL;
1699*6947451fSStefano Zampini   PetscFunctionReturn(0);
1700*6947451fSStefano Zampini }
1701*6947451fSStefano Zampini 
17028cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1703273d9f13SBarry Smith {
1704273d9f13SBarry Smith   Mat_MPIDense   *a;
1705dfbe8321SBarry Smith   PetscErrorCode ierr;
1706273d9f13SBarry Smith 
1707273d9f13SBarry Smith   PetscFunctionBegin;
1708b00a9115SJed Brown   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1709b0a32e0cSBarry Smith   mat->data = (void*)a;
1710273d9f13SBarry Smith   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1711273d9f13SBarry Smith 
1712273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1713ce94432eSBarry Smith   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1714ce94432eSBarry Smith   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1715273d9f13SBarry Smith 
1716273d9f13SBarry Smith   /* build cache for off array entries formed */
1717273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
17182205254eSKarl Rupp 
1719ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1720273d9f13SBarry Smith 
1721273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1722273d9f13SBarry Smith   a->lvec        = 0;
1723273d9f13SBarry Smith   a->Mvctx       = 0;
1724273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1725273d9f13SBarry Smith 
172649a6ff4bSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1727bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
17288572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
17298572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
17308572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1731*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1732*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1733d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1734d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1735*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1736*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1737*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1738*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1739*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1740*6947451fSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
17418baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
17428baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
17438baccfbdSHong Zhang #endif
1744637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1745637a0070SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1746637a0070SStefano Zampini #endif
1747bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
17484222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
17494222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1750bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1751bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
175252c5f739Sprj-   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
175352c5f739Sprj-   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
17548949adfdSHong Zhang 
17558949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
17568949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1757af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1758af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
175938aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1760273d9f13SBarry Smith   PetscFunctionReturn(0);
1761273d9f13SBarry Smith }
1762273d9f13SBarry Smith 
1763209238afSKris Buschelman /*MC
1764637a0070SStefano Zampini    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1765637a0070SStefano Zampini 
1766637a0070SStefano Zampini    Options Database Keys:
1767637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1768637a0070SStefano Zampini 
1769637a0070SStefano Zampini   Level: beginner
1770637a0070SStefano Zampini 
1771637a0070SStefano Zampini .seealso:
1772637a0070SStefano Zampini 
1773637a0070SStefano Zampini M*/
1774637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1775637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1776637a0070SStefano Zampini {
1777637a0070SStefano Zampini   PetscErrorCode ierr;
1778637a0070SStefano Zampini 
1779637a0070SStefano Zampini   PetscFunctionBegin;
1780637a0070SStefano Zampini   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1781637a0070SStefano Zampini   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1782637a0070SStefano Zampini   PetscFunctionReturn(0);
1783637a0070SStefano Zampini }
1784637a0070SStefano Zampini #endif
1785637a0070SStefano Zampini 
1786637a0070SStefano Zampini /*MC
1787002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1788209238afSKris Buschelman 
1789209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1790209238afSKris Buschelman    and MATMPIDENSE otherwise.
1791209238afSKris Buschelman 
1792209238afSKris Buschelman    Options Database Keys:
1793209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1794209238afSKris Buschelman 
1795209238afSKris Buschelman   Level: beginner
1796209238afSKris Buschelman 
179701b82886SBarry Smith 
1798*6947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1799*6947451fSStefano Zampini M*/
1800*6947451fSStefano Zampini 
1801*6947451fSStefano Zampini /*MC
1802*6947451fSStefano Zampini    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1803*6947451fSStefano Zampini 
1804*6947451fSStefano Zampini    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1805*6947451fSStefano Zampini    and MATMPIDENSECUDA otherwise.
1806*6947451fSStefano Zampini 
1807*6947451fSStefano Zampini    Options Database Keys:
1808*6947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1809*6947451fSStefano Zampini 
1810*6947451fSStefano Zampini   Level: beginner
1811*6947451fSStefano Zampini 
1812*6947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1813209238afSKris Buschelman M*/
1814209238afSKris Buschelman 
1815273d9f13SBarry Smith /*@C
1816273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1817273d9f13SBarry Smith 
1818273d9f13SBarry Smith    Not collective
1819273d9f13SBarry Smith 
1820273d9f13SBarry Smith    Input Parameters:
18211c4f3114SJed Brown .  B - the matrix
18220298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1823273d9f13SBarry Smith    to control all matrix memory allocation.
1824273d9f13SBarry Smith 
1825273d9f13SBarry Smith    Notes:
1826273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1827273d9f13SBarry Smith    storage by columns.
1828273d9f13SBarry Smith 
1829273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1830273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
18310298fd71SBarry Smith    set data=NULL.
1832273d9f13SBarry Smith 
1833273d9f13SBarry Smith    Level: intermediate
1834273d9f13SBarry Smith 
1835273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1836273d9f13SBarry Smith @*/
18371c4f3114SJed Brown PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1838273d9f13SBarry Smith {
18394ac538c5SBarry Smith   PetscErrorCode ierr;
1840273d9f13SBarry Smith 
1841273d9f13SBarry Smith   PetscFunctionBegin;
18421c4f3114SJed Brown   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1843273d9f13SBarry Smith   PetscFunctionReturn(0);
1844273d9f13SBarry Smith }
1845273d9f13SBarry Smith 
1846d3042a70SBarry Smith /*@
1847637a0070SStefano Zampini    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1848d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1849d3042a70SBarry Smith    into a matrix
1850d3042a70SBarry Smith 
1851d3042a70SBarry Smith    Not Collective
1852d3042a70SBarry Smith 
1853d3042a70SBarry Smith    Input Parameters:
1854d3042a70SBarry Smith +  mat - the matrix
1855d3042a70SBarry Smith -  array - the array in column major order
1856d3042a70SBarry Smith 
1857d3042a70SBarry Smith    Notes:
1858d3042a70SBarry Smith    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1859d3042a70SBarry Smith    freed when the matrix is destroyed.
1860d3042a70SBarry Smith 
1861d3042a70SBarry Smith    Level: developer
1862d3042a70SBarry Smith 
1863d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1864d3042a70SBarry Smith 
1865d3042a70SBarry Smith @*/
1866637a0070SStefano Zampini PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1867d3042a70SBarry Smith {
1868d3042a70SBarry Smith   PetscErrorCode ierr;
1869637a0070SStefano Zampini 
1870d3042a70SBarry Smith   PetscFunctionBegin;
1871d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1872d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1873637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1874637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_CPU;
1875637a0070SStefano Zampini #endif
1876d3042a70SBarry Smith   PetscFunctionReturn(0);
1877d3042a70SBarry Smith }
1878d3042a70SBarry Smith 
1879d3042a70SBarry Smith /*@
1880d3042a70SBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1881d3042a70SBarry Smith 
1882d3042a70SBarry Smith    Not Collective
1883d3042a70SBarry Smith 
1884d3042a70SBarry Smith    Input Parameters:
1885d3042a70SBarry Smith .  mat - the matrix
1886d3042a70SBarry Smith 
1887d3042a70SBarry Smith    Notes:
1888d3042a70SBarry Smith    You can only call this after a call to MatDensePlaceArray()
1889d3042a70SBarry Smith 
1890d3042a70SBarry Smith    Level: developer
1891d3042a70SBarry Smith 
1892d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1893d3042a70SBarry Smith 
1894d3042a70SBarry Smith @*/
1895d3042a70SBarry Smith PetscErrorCode  MatDenseResetArray(Mat mat)
1896d3042a70SBarry Smith {
1897d3042a70SBarry Smith   PetscErrorCode ierr;
1898637a0070SStefano Zampini 
1899d3042a70SBarry Smith   PetscFunctionBegin;
1900d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1901d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1902d3042a70SBarry Smith   PetscFunctionReturn(0);
1903d3042a70SBarry Smith }
1904d3042a70SBarry Smith 
1905637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
19068965ea79SLois Curfman McInnes /*@C
1907637a0070SStefano Zampini    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1908637a0070SStefano Zampini    array provided by the user. This is useful to avoid copying an array
1909637a0070SStefano Zampini    into a matrix
1910637a0070SStefano Zampini 
1911637a0070SStefano Zampini    Not Collective
1912637a0070SStefano Zampini 
1913637a0070SStefano Zampini    Input Parameters:
1914637a0070SStefano Zampini +  mat - the matrix
1915637a0070SStefano Zampini -  array - the array in column major order
1916637a0070SStefano Zampini 
1917637a0070SStefano Zampini    Notes:
1918637a0070SStefano Zampini    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1919637a0070SStefano Zampini    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
1920637a0070SStefano Zampini 
1921637a0070SStefano Zampini    Level: developer
1922637a0070SStefano Zampini 
1923637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
1924637a0070SStefano Zampini @*/
1925637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
1926637a0070SStefano Zampini {
1927637a0070SStefano Zampini   PetscErrorCode ierr;
1928637a0070SStefano Zampini 
1929637a0070SStefano Zampini   PetscFunctionBegin;
1930637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1931637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1932637a0070SStefano Zampini   mat->offloadmask = PETSC_OFFLOAD_GPU;
1933637a0070SStefano Zampini   PetscFunctionReturn(0);
1934637a0070SStefano Zampini }
1935637a0070SStefano Zampini 
1936637a0070SStefano Zampini /*@C
1937637a0070SStefano Zampini    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
1938637a0070SStefano Zampini 
1939637a0070SStefano Zampini    Not Collective
1940637a0070SStefano Zampini 
1941637a0070SStefano Zampini    Input Parameters:
1942637a0070SStefano Zampini .  mat - the matrix
1943637a0070SStefano Zampini 
1944637a0070SStefano Zampini    Notes:
1945637a0070SStefano Zampini    You can only call this after a call to MatDenseCUDAPlaceArray()
1946637a0070SStefano Zampini 
1947637a0070SStefano Zampini    Level: developer
1948637a0070SStefano Zampini 
1949637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
1950637a0070SStefano Zampini 
1951637a0070SStefano Zampini @*/
1952637a0070SStefano Zampini PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
1953637a0070SStefano Zampini {
1954637a0070SStefano Zampini   PetscErrorCode ierr;
1955637a0070SStefano Zampini 
1956637a0070SStefano Zampini   PetscFunctionBegin;
1957637a0070SStefano Zampini   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1958637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1959637a0070SStefano Zampini   PetscFunctionReturn(0);
1960637a0070SStefano Zampini }
1961637a0070SStefano Zampini 
1962637a0070SStefano Zampini /*@C
1963637a0070SStefano Zampini    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
1964637a0070SStefano Zampini 
1965637a0070SStefano Zampini    Not Collective
1966637a0070SStefano Zampini 
1967637a0070SStefano Zampini    Input Parameters:
1968637a0070SStefano Zampini .  A - the matrix
1969637a0070SStefano Zampini 
1970637a0070SStefano Zampini    Output Parameters
1971637a0070SStefano Zampini .  array - the GPU array in column major order
1972637a0070SStefano Zampini 
1973637a0070SStefano Zampini    Notes:
1974637a0070SStefano Zampini    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
1975637a0070SStefano Zampini 
1976637a0070SStefano Zampini    Level: developer
1977637a0070SStefano Zampini 
1978637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
1979637a0070SStefano Zampini @*/
1980637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
1981637a0070SStefano Zampini {
1982637a0070SStefano Zampini   PetscErrorCode ierr;
1983637a0070SStefano Zampini 
1984637a0070SStefano Zampini   PetscFunctionBegin;
1985637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1986637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1987637a0070SStefano Zampini   PetscFunctionReturn(0);
1988637a0070SStefano Zampini }
1989637a0070SStefano Zampini 
1990637a0070SStefano Zampini /*@C
1991637a0070SStefano Zampini    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
1992637a0070SStefano Zampini 
1993637a0070SStefano Zampini    Not Collective
1994637a0070SStefano Zampini 
1995637a0070SStefano Zampini    Input Parameters:
1996637a0070SStefano Zampini +  A - the matrix
1997637a0070SStefano Zampini -  array - the GPU array in column major order
1998637a0070SStefano Zampini 
1999637a0070SStefano Zampini    Notes:
2000637a0070SStefano Zampini 
2001637a0070SStefano Zampini    Level: developer
2002637a0070SStefano Zampini 
2003637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2004637a0070SStefano Zampini @*/
2005637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2006637a0070SStefano Zampini {
2007637a0070SStefano Zampini   PetscErrorCode ierr;
2008637a0070SStefano Zampini 
2009637a0070SStefano Zampini   PetscFunctionBegin;
2010637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2011637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2012637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2013637a0070SStefano Zampini   PetscFunctionReturn(0);
2014637a0070SStefano Zampini }
2015637a0070SStefano Zampini 
2016637a0070SStefano Zampini /*@C
2017637a0070SStefano Zampini    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2018637a0070SStefano Zampini 
2019637a0070SStefano Zampini    Not Collective
2020637a0070SStefano Zampini 
2021637a0070SStefano Zampini    Input Parameters:
2022637a0070SStefano Zampini .  A - the matrix
2023637a0070SStefano Zampini 
2024637a0070SStefano Zampini    Output Parameters
2025637a0070SStefano Zampini .  array - the GPU array in column major order
2026637a0070SStefano Zampini 
2027637a0070SStefano Zampini    Notes:
2028637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2029637a0070SStefano Zampini 
2030637a0070SStefano Zampini    Level: developer
2031637a0070SStefano Zampini 
2032637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2033637a0070SStefano Zampini @*/
2034637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2035637a0070SStefano Zampini {
2036637a0070SStefano Zampini   PetscErrorCode ierr;
2037637a0070SStefano Zampini 
2038637a0070SStefano Zampini   PetscFunctionBegin;
2039637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2040637a0070SStefano Zampini   PetscFunctionReturn(0);
2041637a0070SStefano Zampini }
2042637a0070SStefano Zampini 
2043637a0070SStefano Zampini /*@C
2044637a0070SStefano Zampini    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2045637a0070SStefano Zampini 
2046637a0070SStefano Zampini    Not Collective
2047637a0070SStefano Zampini 
2048637a0070SStefano Zampini    Input Parameters:
2049637a0070SStefano Zampini +  A - the matrix
2050637a0070SStefano Zampini -  array - the GPU array in column major order
2051637a0070SStefano Zampini 
2052637a0070SStefano Zampini    Notes:
2053637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2054637a0070SStefano Zampini 
2055637a0070SStefano Zampini    Level: developer
2056637a0070SStefano Zampini 
2057637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2058637a0070SStefano Zampini @*/
2059637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2060637a0070SStefano Zampini {
2061637a0070SStefano Zampini   PetscErrorCode ierr;
2062637a0070SStefano Zampini 
2063637a0070SStefano Zampini   PetscFunctionBegin;
2064637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2065637a0070SStefano Zampini   PetscFunctionReturn(0);
2066637a0070SStefano Zampini }
2067637a0070SStefano Zampini 
2068637a0070SStefano Zampini /*@C
2069637a0070SStefano Zampini    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2070637a0070SStefano Zampini 
2071637a0070SStefano Zampini    Not Collective
2072637a0070SStefano Zampini 
2073637a0070SStefano Zampini    Input Parameters:
2074637a0070SStefano Zampini .  A - the matrix
2075637a0070SStefano Zampini 
2076637a0070SStefano Zampini    Output Parameters
2077637a0070SStefano Zampini .  array - the GPU array in column major order
2078637a0070SStefano Zampini 
2079637a0070SStefano Zampini    Notes:
2080637a0070SStefano Zampini    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2081637a0070SStefano Zampini 
2082637a0070SStefano Zampini    Level: developer
2083637a0070SStefano Zampini 
2084637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2085637a0070SStefano Zampini @*/
2086637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2087637a0070SStefano Zampini {
2088637a0070SStefano Zampini   PetscErrorCode ierr;
2089637a0070SStefano Zampini 
2090637a0070SStefano Zampini   PetscFunctionBegin;
2091637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2092637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2093637a0070SStefano Zampini   PetscFunctionReturn(0);
2094637a0070SStefano Zampini }
2095637a0070SStefano Zampini 
2096637a0070SStefano Zampini /*@C
2097637a0070SStefano Zampini    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2098637a0070SStefano Zampini 
2099637a0070SStefano Zampini    Not Collective
2100637a0070SStefano Zampini 
2101637a0070SStefano Zampini    Input Parameters:
2102637a0070SStefano Zampini +  A - the matrix
2103637a0070SStefano Zampini -  array - the GPU array in column major order
2104637a0070SStefano Zampini 
2105637a0070SStefano Zampini    Notes:
2106637a0070SStefano Zampini 
2107637a0070SStefano Zampini    Level: developer
2108637a0070SStefano Zampini 
2109637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2110637a0070SStefano Zampini @*/
2111637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2112637a0070SStefano Zampini {
2113637a0070SStefano Zampini   PetscErrorCode ierr;
2114637a0070SStefano Zampini 
2115637a0070SStefano Zampini   PetscFunctionBegin;
2116637a0070SStefano Zampini   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2117637a0070SStefano Zampini   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2118637a0070SStefano Zampini   A->offloadmask = PETSC_OFFLOAD_GPU;
2119637a0070SStefano Zampini   PetscFunctionReturn(0);
2120637a0070SStefano Zampini }
2121637a0070SStefano Zampini #endif
2122637a0070SStefano Zampini 
2123637a0070SStefano Zampini /*@C
2124637a0070SStefano Zampini    MatCreateDense - Creates a matrix in dense format.
21258965ea79SLois Curfman McInnes 
2126d083f849SBarry Smith    Collective
2127db81eaa0SLois Curfman McInnes 
21288965ea79SLois Curfman McInnes    Input Parameters:
2129db81eaa0SLois Curfman McInnes +  comm - MPI communicator
21308965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2131db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
21328965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2133db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
21346cfe35ddSJose E. Roman -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2135dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
21368965ea79SLois Curfman McInnes 
21378965ea79SLois Curfman McInnes    Output Parameter:
2138477f1c0bSLois Curfman McInnes .  A - the matrix
21398965ea79SLois Curfman McInnes 
2140b259b22eSLois Curfman McInnes    Notes:
214139ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
214239ddd567SLois Curfman McInnes    storage by columns.
21438965ea79SLois Curfman McInnes 
214418f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
214518f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
21466cfe35ddSJose E. Roman    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
214718f449edSLois Curfman McInnes 
21488965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
21498965ea79SLois Curfman McInnes    (possibly both).
21508965ea79SLois Curfman McInnes 
2151027ccd11SLois Curfman McInnes    Level: intermediate
2152027ccd11SLois Curfman McInnes 
215339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
21548965ea79SLois Curfman McInnes @*/
215569b1f4b7SBarry Smith PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
21568965ea79SLois Curfman McInnes {
21576849ba73SBarry Smith   PetscErrorCode ierr;
215813f74950SBarry Smith   PetscMPIInt    size;
21598965ea79SLois Curfman McInnes 
21603a40ed3dSBarry Smith   PetscFunctionBegin;
2161f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
21628491ab44SLisandro Dalcin   PetscValidLogicalCollectiveBool(*A,!!data,6);
2163f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2164273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2165273d9f13SBarry Smith   if (size > 1) {
2166273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2167273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
21686cfe35ddSJose E. Roman     if (data) {  /* user provided data array, so no need to assemble */
21696cfe35ddSJose E. Roman       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
21706cfe35ddSJose E. Roman       (*A)->assembled = PETSC_TRUE;
21716cfe35ddSJose E. Roman     }
2172273d9f13SBarry Smith   } else {
2173273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2174273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
21758c469469SLois Curfman McInnes   }
21763a40ed3dSBarry Smith   PetscFunctionReturn(0);
21778965ea79SLois Curfman McInnes }
21788965ea79SLois Curfman McInnes 
2179637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA)
2180637a0070SStefano Zampini /*@C
2181637a0070SStefano Zampini    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2182637a0070SStefano Zampini 
2183637a0070SStefano Zampini    Collective
2184637a0070SStefano Zampini 
2185637a0070SStefano Zampini    Input Parameters:
2186637a0070SStefano Zampini +  comm - MPI communicator
2187637a0070SStefano Zampini .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2188637a0070SStefano Zampini .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2189637a0070SStefano Zampini .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2190637a0070SStefano Zampini .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2191637a0070SStefano Zampini -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2192637a0070SStefano Zampini    to control matrix memory allocation.
2193637a0070SStefano Zampini 
2194637a0070SStefano Zampini    Output Parameter:
2195637a0070SStefano Zampini .  A - the matrix
2196637a0070SStefano Zampini 
2197637a0070SStefano Zampini    Notes:
2198637a0070SStefano Zampini 
2199637a0070SStefano Zampini    Level: intermediate
2200637a0070SStefano Zampini 
2201637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense()
2202637a0070SStefano Zampini @*/
2203637a0070SStefano Zampini PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2204637a0070SStefano Zampini {
2205637a0070SStefano Zampini   PetscErrorCode ierr;
2206637a0070SStefano Zampini   PetscMPIInt    size;
2207637a0070SStefano Zampini 
2208637a0070SStefano Zampini   PetscFunctionBegin;
2209637a0070SStefano Zampini   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2210637a0070SStefano Zampini   PetscValidLogicalCollectiveBool(*A,!!data,6);
2211637a0070SStefano Zampini   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2212637a0070SStefano Zampini   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2213637a0070SStefano Zampini   if (size > 1) {
2214637a0070SStefano Zampini     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2215637a0070SStefano Zampini     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2216637a0070SStefano Zampini     if (data) {  /* user provided data array, so no need to assemble */
2217637a0070SStefano Zampini       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2218637a0070SStefano Zampini       (*A)->assembled = PETSC_TRUE;
2219637a0070SStefano Zampini     }
2220637a0070SStefano Zampini   } else {
2221637a0070SStefano Zampini     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2222637a0070SStefano Zampini     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2223637a0070SStefano Zampini   }
2224637a0070SStefano Zampini   PetscFunctionReturn(0);
2225637a0070SStefano Zampini }
2226637a0070SStefano Zampini #endif
2227637a0070SStefano Zampini 
22286849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
22298965ea79SLois Curfman McInnes {
22308965ea79SLois Curfman McInnes   Mat            mat;
22313501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2232dfbe8321SBarry Smith   PetscErrorCode ierr;
22338965ea79SLois Curfman McInnes 
22343a40ed3dSBarry Smith   PetscFunctionBegin;
22358965ea79SLois Curfman McInnes   *newmat = 0;
2236ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2237d0f46423SBarry Smith   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
22387adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2239834f8fabSBarry Smith   a       = (Mat_MPIDense*)mat->data;
22405aa7edbeSHong Zhang 
2241d5f3da31SBarry Smith   mat->factortype   = A->factortype;
2242c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
2243273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
22448965ea79SLois Curfman McInnes 
22458965ea79SLois Curfman McInnes   a->size         = oldmat->size;
22468965ea79SLois Curfman McInnes   a->rank         = oldmat->rank;
2247e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
22483782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
2249e04c1aa4SHong Zhang 
22501e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
22511e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
22528965ea79SLois Curfman McInnes 
22535609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
22543bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2255637a0070SStefano Zampini   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
225601b82886SBarry Smith 
22578965ea79SLois Curfman McInnes   *newmat = mat;
22583a40ed3dSBarry Smith   PetscFunctionReturn(0);
22598965ea79SLois Curfman McInnes }
22608965ea79SLois Curfman McInnes 
2261eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2262eb91f321SVaclav Hapla {
2263eb91f321SVaclav Hapla   PetscErrorCode ierr;
226487d5ce66SSatish Balay   PetscBool      isbinary;
226587d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
226687d5ce66SSatish Balay   PetscBool      ishdf5;
226787d5ce66SSatish Balay #endif
2268eb91f321SVaclav Hapla 
2269eb91f321SVaclav Hapla   PetscFunctionBegin;
2270eb91f321SVaclav Hapla   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2271eb91f321SVaclav Hapla   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2272eb91f321SVaclav Hapla   /* force binary viewer to load .info file if it has not yet done so */
2273eb91f321SVaclav Hapla   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2274eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
227587d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5)
2276eb91f321SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
227787d5ce66SSatish Balay #endif
2278eb91f321SVaclav Hapla   if (isbinary) {
22798491ab44SLisandro Dalcin     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2280eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5)
228187d5ce66SSatish Balay   } else if (ishdf5) {
2282eb91f321SVaclav Hapla     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2283eb91f321SVaclav Hapla #endif
228487d5ce66SSatish Balay   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2285eb91f321SVaclav Hapla   PetscFunctionReturn(0);
2286eb91f321SVaclav Hapla }
2287eb91f321SVaclav Hapla 
2288ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
22896e4ee0c6SHong Zhang {
22906e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
22916e4ee0c6SHong Zhang   Mat            a,b;
2292ace3abfcSBarry Smith   PetscBool      flg;
22936e4ee0c6SHong Zhang   PetscErrorCode ierr;
229490ace30eSBarry Smith 
22956e4ee0c6SHong Zhang   PetscFunctionBegin;
22966e4ee0c6SHong Zhang   a    = matA->A;
22976e4ee0c6SHong Zhang   b    = matB->A;
22986e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2299b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
23006e4ee0c6SHong Zhang   PetscFunctionReturn(0);
23016e4ee0c6SHong Zhang }
230290ace30eSBarry Smith 
2303baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2304baa3c1c6SHong Zhang {
2305baa3c1c6SHong Zhang   PetscErrorCode        ierr;
2306baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2307baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = a->atbdense;
2308baa3c1c6SHong Zhang 
2309baa3c1c6SHong Zhang   PetscFunctionBegin;
2310637a0070SStefano Zampini   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2311637a0070SStefano Zampini   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2312637a0070SStefano Zampini   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2313baa3c1c6SHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
2314baa3c1c6SHong Zhang   PetscFunctionReturn(0);
2315baa3c1c6SHong Zhang }
2316baa3c1c6SHong Zhang 
2317cc48ffa7SToby Isaac PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2318cc48ffa7SToby Isaac {
2319cc48ffa7SToby Isaac   PetscErrorCode        ierr;
2320cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2321cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt = a->abtdense;
2322cc48ffa7SToby Isaac 
2323cc48ffa7SToby Isaac   PetscFunctionBegin;
2324cc48ffa7SToby Isaac   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2325faa55883SToby Isaac   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2326cc48ffa7SToby Isaac   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2327cc48ffa7SToby Isaac   ierr = PetscFree(abt);CHKERRQ(ierr);
2328cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2329cc48ffa7SToby Isaac }
2330cc48ffa7SToby Isaac 
2331cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2332cb20be35SHong Zhang {
2333baa3c1c6SHong Zhang   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2334baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = c->atbdense;
2335cb20be35SHong Zhang   PetscErrorCode        ierr;
2336cb20be35SHong Zhang   MPI_Comm              comm;
2337637a0070SStefano Zampini   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2338637a0070SStefano Zampini   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2339637a0070SStefano Zampini   const PetscScalar     *atbarray;
2340d5017740SHong Zhang   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2341e68c0b26SHong Zhang   const PetscInt        *ranges;
2342cb20be35SHong Zhang 
2343cb20be35SHong Zhang   PetscFunctionBegin;
2344cb20be35SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2345cb20be35SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2346e68c0b26SHong Zhang 
2347c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
2348637a0070SStefano Zampini   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2349cb20be35SHong Zhang 
2350cb20be35SHong Zhang   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2351c5ef1628SHong Zhang   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2352cb20be35SHong Zhang 
2353660d5466SHong Zhang   /* arrange atbarray into sendbuf */
2354637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2355637a0070SStefano Zampini   for (proc=0, k=0; proc<size; proc++) {
2356baa3c1c6SHong Zhang     for (j=0; j<cN; j++) {
2357c5ef1628SHong Zhang       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2358cb20be35SHong Zhang     }
2359cb20be35SHong Zhang   }
2360637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2361637a0070SStefano Zampini 
2362c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
2363660d5466SHong Zhang   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
23643462b7efSHong Zhang   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2365660d5466SHong Zhang   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2366cb20be35SHong Zhang   PetscFunctionReturn(0);
2367cb20be35SHong Zhang }
2368cb20be35SHong Zhang 
23694222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2370cb20be35SHong Zhang {
2371cb20be35SHong Zhang   PetscErrorCode        ierr;
2372cb20be35SHong Zhang   MPI_Comm              comm;
2373baa3c1c6SHong Zhang   PetscMPIInt           size;
2374660d5466SHong Zhang   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2375baa3c1c6SHong Zhang   Mat_MPIDense          *c;
2376baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
2377cb20be35SHong Zhang 
2378cb20be35SHong Zhang   PetscFunctionBegin;
2379baa3c1c6SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2380cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2381cb20be35SHong Zhang     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2382cb20be35SHong Zhang   }
2383cb20be35SHong Zhang 
23844222ddf1SHong Zhang   /* create matrix product C */
23854222ddf1SHong Zhang   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
23864222ddf1SHong Zhang   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
238718992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
23884222ddf1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
23894222ddf1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390baa3c1c6SHong Zhang 
23914222ddf1SHong Zhang   /* create data structure for reuse C */
2392baa3c1c6SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2393baa3c1c6SHong Zhang   ierr = PetscNew(&atb);CHKERRQ(ierr);
23944222ddf1SHong Zhang   cM   = C->rmap->N;
2395637a0070SStefano Zampini   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2396baa3c1c6SHong Zhang 
23974222ddf1SHong Zhang   c               = (Mat_MPIDense*)C->data;
2398baa3c1c6SHong Zhang   c->atbdense     = atb;
23994222ddf1SHong Zhang   atb->destroy    = C->ops->destroy;
24004222ddf1SHong Zhang   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2401cb20be35SHong Zhang   PetscFunctionReturn(0);
2402cb20be35SHong Zhang }
2403cb20be35SHong Zhang 
24044222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2405cb20be35SHong Zhang {
2406cb20be35SHong Zhang   PetscErrorCode        ierr;
2407cc48ffa7SToby Isaac   MPI_Comm              comm;
2408cc48ffa7SToby Isaac   PetscMPIInt           i, size;
2409cc48ffa7SToby Isaac   PetscInt              maxRows, bufsiz;
2410cc48ffa7SToby Isaac   Mat_MPIDense          *c;
2411cc48ffa7SToby Isaac   PetscMPIInt           tag;
24124222ddf1SHong Zhang   PetscInt              alg;
2413cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
24144222ddf1SHong Zhang   Mat_Product           *product = C->product;
24154222ddf1SHong Zhang   PetscBool             flg;
2416cc48ffa7SToby Isaac 
2417cc48ffa7SToby Isaac   PetscFunctionBegin;
24184222ddf1SHong Zhang   /* check local size of A and B */
2419637a0070SStefano Zampini   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2420cc48ffa7SToby Isaac 
24214222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2422637a0070SStefano Zampini   alg  = flg ? 0 : 1;
2423cc48ffa7SToby Isaac 
24244222ddf1SHong Zhang   /* setup matrix product C */
24254222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
24264222ddf1SHong Zhang   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
242718992e5dSStefano Zampini   ierr = MatSetUp(C);CHKERRQ(ierr);
24284222ddf1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
24294222ddf1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24304222ddf1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24314222ddf1SHong Zhang 
24324222ddf1SHong Zhang   /* create data structure for reuse C */
24334222ddf1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2434cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2435cc48ffa7SToby Isaac   ierr = PetscNew(&abt);CHKERRQ(ierr);
2436cc48ffa7SToby Isaac   abt->tag = tag;
2437faa55883SToby Isaac   abt->alg = alg;
2438faa55883SToby Isaac   switch (alg) {
24394222ddf1SHong Zhang   case 1: /* alg: "cyclic" */
2440cc48ffa7SToby Isaac     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2441cc48ffa7SToby Isaac     bufsiz = A->cmap->N * maxRows;
2442cc48ffa7SToby Isaac     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2443faa55883SToby Isaac     break;
24444222ddf1SHong Zhang   default: /* alg: "allgatherv" */
2445faa55883SToby Isaac     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2446faa55883SToby Isaac     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2447faa55883SToby Isaac     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2448faa55883SToby Isaac     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2449faa55883SToby Isaac     break;
2450faa55883SToby Isaac   }
2451cc48ffa7SToby Isaac 
24524222ddf1SHong Zhang   c                               = (Mat_MPIDense*)C->data;
2453cc48ffa7SToby Isaac   c->abtdense                     = abt;
24544222ddf1SHong Zhang   abt->destroy                    = C->ops->destroy;
24554222ddf1SHong Zhang   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
24564222ddf1SHong Zhang   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2457cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2458cc48ffa7SToby Isaac }
2459cc48ffa7SToby Isaac 
2460faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2461cc48ffa7SToby Isaac {
2462cc48ffa7SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2463cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt = c->abtdense;
2464cc48ffa7SToby Isaac   PetscErrorCode        ierr;
2465cc48ffa7SToby Isaac   MPI_Comm              comm;
2466cc48ffa7SToby Isaac   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2467637a0070SStefano Zampini   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2468cc48ffa7SToby Isaac   PetscInt              i,cK=A->cmap->N,k,j,bn;
2469cc48ffa7SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2470637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2471637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2472cc48ffa7SToby Isaac   MPI_Request           reqs[2];
2473cc48ffa7SToby Isaac   const PetscInt        *ranges;
2474cc48ffa7SToby Isaac 
2475cc48ffa7SToby Isaac   PetscFunctionBegin;
2476cc48ffa7SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2477cc48ffa7SToby Isaac   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2478cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2479637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2480637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2481637a0070SStefano Zampini   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2482637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2483637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2484637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2485637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2486637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2487637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2488cc48ffa7SToby Isaac   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2489cc48ffa7SToby Isaac   bn   = B->rmap->n;
2490637a0070SStefano Zampini   if (blda == bn) {
2491637a0070SStefano Zampini     sendbuf = (PetscScalar*)bv;
2492cc48ffa7SToby Isaac   } else {
2493cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2494cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2495cc48ffa7SToby Isaac       for (j = 0; j < bn; j++, k++) {
2496637a0070SStefano Zampini         sendbuf[k] = bv[i * blda + j];
2497cc48ffa7SToby Isaac       }
2498cc48ffa7SToby Isaac     }
2499cc48ffa7SToby Isaac   }
2500cc48ffa7SToby Isaac   if (size > 1) {
2501cc48ffa7SToby Isaac     sendto = (rank + size - 1) % size;
2502cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2503cc48ffa7SToby Isaac   } else {
2504cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2505cc48ffa7SToby Isaac   }
2506cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2507cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2508cc48ffa7SToby Isaac   recvisfrom = rank;
2509cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2510cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2511cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2512cc48ffa7SToby Isaac     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2513cc48ffa7SToby Isaac 
2514cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2515cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2516cc48ffa7SToby Isaac       sendsiz = cK * bn;
2517cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2518cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2519cc48ffa7SToby Isaac       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2520cc48ffa7SToby Isaac       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2521cc48ffa7SToby Isaac     }
2522cc48ffa7SToby Isaac 
2523cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
2524cc48ffa7SToby Isaac     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2525637a0070SStefano Zampini     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2526cc48ffa7SToby Isaac 
2527cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2528cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2529cc48ffa7SToby Isaac       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2530cc48ffa7SToby Isaac     }
2531cc48ffa7SToby Isaac     bn = nextbn;
2532cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2533cc48ffa7SToby Isaac     sendbuf = recvbuf;
2534cc48ffa7SToby Isaac   }
2535637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2536637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2537637a0070SStefano Zampini   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2538cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2539cc48ffa7SToby Isaac }
2540cc48ffa7SToby Isaac 
2541faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2542faa55883SToby Isaac {
2543faa55883SToby Isaac   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2544faa55883SToby Isaac   Mat_MatTransMultDense *abt = c->abtdense;
2545faa55883SToby Isaac   PetscErrorCode        ierr;
2546faa55883SToby Isaac   MPI_Comm              comm;
2547637a0070SStefano Zampini   PetscMPIInt           size;
2548637a0070SStefano Zampini   PetscScalar           *cv, *sendbuf, *recvbuf;
2549637a0070SStefano Zampini   const PetscScalar     *av,*bv;
2550637a0070SStefano Zampini   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2551faa55883SToby Isaac   PetscScalar           _DOne=1.0,_DZero=0.0;
2552637a0070SStefano Zampini   PetscBLASInt          cm, cn, ck, alda, clda;
2553faa55883SToby Isaac 
2554faa55883SToby Isaac   PetscFunctionBegin;
2555faa55883SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2556faa55883SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2558637a0070SStefano Zampini   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2559637a0070SStefano Zampini   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2560637a0070SStefano Zampini   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2561637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2562637a0070SStefano Zampini   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2563637a0070SStefano Zampini   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2564637a0070SStefano Zampini   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2565faa55883SToby Isaac   /* copy transpose of B into buf[0] */
2566faa55883SToby Isaac   bn      = B->rmap->n;
2567faa55883SToby Isaac   sendbuf = abt->buf[0];
2568faa55883SToby Isaac   recvbuf = abt->buf[1];
2569faa55883SToby Isaac   for (k = 0, j = 0; j < bn; j++) {
2570faa55883SToby Isaac     for (i = 0; i < cK; i++, k++) {
2571637a0070SStefano Zampini       sendbuf[k] = bv[i * blda + j];
2572faa55883SToby Isaac     }
2573faa55883SToby Isaac   }
2574637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2575faa55883SToby Isaac   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2576faa55883SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2577faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2578faa55883SToby Isaac   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2579637a0070SStefano Zampini   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2580637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2581637a0070SStefano Zampini   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2582637a0070SStefano Zampini   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2583faa55883SToby Isaac   PetscFunctionReturn(0);
2584faa55883SToby Isaac }
2585faa55883SToby Isaac 
2586faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2587faa55883SToby Isaac {
2588faa55883SToby Isaac   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2589faa55883SToby Isaac   Mat_MatTransMultDense *abt = c->abtdense;
2590faa55883SToby Isaac   PetscErrorCode        ierr;
2591faa55883SToby Isaac 
2592faa55883SToby Isaac   PetscFunctionBegin;
2593faa55883SToby Isaac   switch (abt->alg) {
2594faa55883SToby Isaac   case 1:
2595faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2596faa55883SToby Isaac     break;
2597faa55883SToby Isaac   default:
2598faa55883SToby Isaac     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2599faa55883SToby Isaac     break;
2600faa55883SToby Isaac   }
2601faa55883SToby Isaac   PetscFunctionReturn(0);
2602faa55883SToby Isaac }
2603faa55883SToby Isaac 
2604320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2605320f2790SHong Zhang {
2606320f2790SHong Zhang   PetscErrorCode   ierr;
2607320f2790SHong Zhang   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2608320f2790SHong Zhang   Mat_MatMultDense *ab = a->abdense;
2609320f2790SHong Zhang 
2610320f2790SHong Zhang   PetscFunctionBegin;
2611320f2790SHong Zhang   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2612320f2790SHong Zhang   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2613320f2790SHong Zhang   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2614320f2790SHong Zhang 
2615320f2790SHong Zhang   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2616320f2790SHong Zhang   ierr = PetscFree(ab);CHKERRQ(ierr);
2617320f2790SHong Zhang   PetscFunctionReturn(0);
2618320f2790SHong Zhang }
2619320f2790SHong Zhang 
26205242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
2621320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2622320f2790SHong Zhang {
2623320f2790SHong Zhang   PetscErrorCode   ierr;
2624320f2790SHong Zhang   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2625320f2790SHong Zhang   Mat_MatMultDense *ab=c->abdense;
2626320f2790SHong Zhang 
2627320f2790SHong Zhang   PetscFunctionBegin;
2628de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2629de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
26304222ddf1SHong Zhang   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2631de0a22f0SHong Zhang   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2632320f2790SHong Zhang   PetscFunctionReturn(0);
2633320f2790SHong Zhang }
2634320f2790SHong Zhang 
26354222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2636320f2790SHong Zhang {
2637320f2790SHong Zhang   PetscErrorCode   ierr;
2638320f2790SHong Zhang   Mat              Ae,Be,Ce;
2639320f2790SHong Zhang   Mat_MPIDense     *c;
2640320f2790SHong Zhang   Mat_MatMultDense *ab;
2641320f2790SHong Zhang 
2642320f2790SHong Zhang   PetscFunctionBegin;
26434222ddf1SHong Zhang   /* check local size of A and B */
2644320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2645378336b6SHong Zhang     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2646320f2790SHong Zhang   }
2647320f2790SHong Zhang 
26484222ddf1SHong Zhang   /* create elemental matrices Ae and Be */
26494222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
26504222ddf1SHong Zhang   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
26514222ddf1SHong Zhang   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
26524222ddf1SHong Zhang   ierr = MatSetUp(Ae);CHKERRQ(ierr);
26534222ddf1SHong Zhang   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2654320f2790SHong Zhang 
26554222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
26564222ddf1SHong Zhang   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
26574222ddf1SHong Zhang   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
26584222ddf1SHong Zhang   ierr = MatSetUp(Be);CHKERRQ(ierr);
26594222ddf1SHong Zhang   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2660320f2790SHong Zhang 
26614222ddf1SHong Zhang   /* compute symbolic Ce = Ae*Be */
26624222ddf1SHong Zhang   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
26634222ddf1SHong Zhang   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
26644222ddf1SHong Zhang 
26654222ddf1SHong Zhang   /* setup C */
26664222ddf1SHong Zhang   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
26674222ddf1SHong Zhang   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
26684222ddf1SHong Zhang   ierr = MatSetUp(C);CHKERRQ(ierr);
2669320f2790SHong Zhang 
2670320f2790SHong Zhang   /* create data structure for reuse Cdense */
2671320f2790SHong Zhang   ierr = PetscNew(&ab);CHKERRQ(ierr);
26724222ddf1SHong Zhang   c                  = (Mat_MPIDense*)C->data;
2673320f2790SHong Zhang   c->abdense         = ab;
2674320f2790SHong Zhang 
2675320f2790SHong Zhang   ab->Ae             = Ae;
2676320f2790SHong Zhang   ab->Be             = Be;
2677320f2790SHong Zhang   ab->Ce             = Ce;
26784222ddf1SHong Zhang   ab->destroy        = C->ops->destroy;
26794222ddf1SHong Zhang   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
26804222ddf1SHong Zhang   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
26814222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_AB;
2682320f2790SHong Zhang   PetscFunctionReturn(0);
2683320f2790SHong Zhang }
26844222ddf1SHong Zhang #endif
26854222ddf1SHong Zhang /* ----------------------------------------------- */
26864222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
26874222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2688320f2790SHong Zhang {
2689320f2790SHong Zhang   PetscFunctionBegin;
26904222ddf1SHong Zhang   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
26914222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AB;
26924222ddf1SHong Zhang   C->ops->productnumeric  = MatProductNumeric_AB;
2693320f2790SHong Zhang   PetscFunctionReturn(0);
2694320f2790SHong Zhang }
26955242a7b1SHong Zhang #endif
269686aefd0dSHong Zhang 
26974222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
26984222ddf1SHong Zhang {
26994222ddf1SHong Zhang   PetscErrorCode ierr;
27004222ddf1SHong Zhang   Mat_Product    *product = C->product;
27014222ddf1SHong Zhang 
27024222ddf1SHong Zhang   PetscFunctionBegin;
27034222ddf1SHong Zhang   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
27044222ddf1SHong Zhang   C->ops->productnumeric          = MatProductNumeric_AtB;
27054222ddf1SHong Zhang   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
27064222ddf1SHong Zhang   PetscFunctionReturn(0);
27074222ddf1SHong Zhang }
27084222ddf1SHong Zhang 
27094222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
27104222ddf1SHong Zhang {
27114222ddf1SHong Zhang   Mat_Product *product = C->product;
27124222ddf1SHong Zhang   Mat         A = product->A,B=product->B;
27134222ddf1SHong Zhang 
27144222ddf1SHong Zhang   PetscFunctionBegin;
27154222ddf1SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
27164222ddf1SHong Zhang     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
27174222ddf1SHong Zhang 
27184222ddf1SHong Zhang   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
27194222ddf1SHong Zhang   PetscFunctionReturn(0);
27204222ddf1SHong Zhang }
27214222ddf1SHong Zhang 
27224222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
27234222ddf1SHong Zhang {
27244222ddf1SHong Zhang   PetscErrorCode ierr;
27254222ddf1SHong Zhang   Mat_Product    *product = C->product;
27264222ddf1SHong Zhang   const char     *algTypes[2] = {"allgatherv","cyclic"};
27274222ddf1SHong Zhang   PetscInt       alg,nalg = 2;
27284222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
27294222ddf1SHong Zhang 
27304222ddf1SHong Zhang   PetscFunctionBegin;
27314222ddf1SHong Zhang   /* Set default algorithm */
27324222ddf1SHong Zhang   alg = 0; /* default is allgatherv */
27334222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
27344222ddf1SHong Zhang   if (flg) {
27354222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
27364222ddf1SHong Zhang   }
27374222ddf1SHong Zhang 
27384222ddf1SHong Zhang   /* Get runtime option */
27394222ddf1SHong Zhang   if (product->api_user) {
27404222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
27414222ddf1SHong Zhang     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
27424222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
27434222ddf1SHong Zhang   } else {
27444222ddf1SHong Zhang     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
27454222ddf1SHong Zhang     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
27464222ddf1SHong Zhang     ierr = PetscOptionsEnd();CHKERRQ(ierr);
27474222ddf1SHong Zhang   }
27484222ddf1SHong Zhang   if (flg) {
27494222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
27504222ddf1SHong Zhang   }
27514222ddf1SHong Zhang 
27524222ddf1SHong Zhang   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
27534222ddf1SHong Zhang   C->ops->productsymbolic          = MatProductSymbolic_ABt;
27544222ddf1SHong Zhang   C->ops->productnumeric           = MatProductNumeric_ABt;
27554222ddf1SHong Zhang   PetscFunctionReturn(0);
27564222ddf1SHong Zhang }
27574222ddf1SHong Zhang 
27584222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
27594222ddf1SHong Zhang {
27604222ddf1SHong Zhang   PetscErrorCode ierr;
27614222ddf1SHong Zhang   Mat_Product    *product = C->product;
27624222ddf1SHong Zhang 
27634222ddf1SHong Zhang   PetscFunctionBegin;
27644222ddf1SHong Zhang   switch (product->type) {
27654222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
27664222ddf1SHong Zhang   case MATPRODUCT_AB:
27674222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
27684222ddf1SHong Zhang     break;
27694222ddf1SHong Zhang #endif
27704222ddf1SHong Zhang   case MATPRODUCT_AtB:
27714222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
27724222ddf1SHong Zhang     break;
27734222ddf1SHong Zhang   case MATPRODUCT_ABt:
27744222ddf1SHong Zhang     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
27754222ddf1SHong Zhang     break;
2776544a5e07SHong Zhang   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
27774222ddf1SHong Zhang   }
27784222ddf1SHong Zhang   PetscFunctionReturn(0);
27794222ddf1SHong Zhang }
2780