xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cc48ffa724f913e7340d1ed3a7bdcad04ee50d62)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
604fea9ffSBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
9baa3c1c6SHong Zhang #include <petscblaslapack.h>
108965ea79SLois Curfman McInnes 
11ab92ecdeSBarry Smith /*@
12ab92ecdeSBarry Smith 
13ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
15ab92ecdeSBarry Smith 
16ab92ecdeSBarry Smith     Input Parameter:
17ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
18ab92ecdeSBarry Smith 
19ab92ecdeSBarry Smith     Output Parameter:
20ab92ecdeSBarry Smith .      B - the inner matrix
21ab92ecdeSBarry Smith 
228e6c10adSSatish Balay     Level: intermediate
238e6c10adSSatish Balay 
24ab92ecdeSBarry Smith @*/
25ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26ab92ecdeSBarry Smith {
27ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28ab92ecdeSBarry Smith   PetscErrorCode ierr;
29ace3abfcSBarry Smith   PetscBool      flg;
30ab92ecdeSBarry Smith 
31ab92ecdeSBarry Smith   PetscFunctionBegin;
32251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
332205254eSKarl Rupp   if (flg) *B = mat->A;
342205254eSKarl Rupp   else *B = A;
35ab92ecdeSBarry Smith   PetscFunctionReturn(0);
36ab92ecdeSBarry Smith }
37ab92ecdeSBarry Smith 
38ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39ba8c8a56SBarry Smith {
40ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41ba8c8a56SBarry Smith   PetscErrorCode ierr;
42d0f46423SBarry Smith   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43ba8c8a56SBarry Smith 
44ba8c8a56SBarry Smith   PetscFunctionBegin;
45e7e72b3dSBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46ba8c8a56SBarry Smith   lrow = row - rstart;
47ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48ba8c8a56SBarry Smith   PetscFunctionReturn(0);
49ba8c8a56SBarry Smith }
50ba8c8a56SBarry Smith 
51ba8c8a56SBarry Smith PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52ba8c8a56SBarry Smith {
53ba8c8a56SBarry Smith   PetscErrorCode ierr;
54ba8c8a56SBarry Smith 
55ba8c8a56SBarry Smith   PetscFunctionBegin;
56ba8c8a56SBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57ba8c8a56SBarry Smith   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58ba8c8a56SBarry Smith   PetscFunctionReturn(0);
59ba8c8a56SBarry Smith }
60ba8c8a56SBarry Smith 
6111bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
620de54da6SSatish Balay {
630de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
646849ba73SBarry Smith   PetscErrorCode ierr;
65d0f46423SBarry Smith   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
6687828ca2SBarry Smith   PetscScalar    *array;
670de54da6SSatish Balay   MPI_Comm       comm;
6811bd1e4dSLisandro Dalcin   Mat            B;
690de54da6SSatish Balay 
700de54da6SSatish Balay   PetscFunctionBegin;
71e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
720de54da6SSatish Balay 
7311bd1e4dSLisandro Dalcin   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
7411bd1e4dSLisandro Dalcin   if (!B) {
750de54da6SSatish Balay     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
7611bd1e4dSLisandro Dalcin     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
7711bd1e4dSLisandro Dalcin     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
7811bd1e4dSLisandro Dalcin     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
798c778c55SBarry Smith     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
8011bd1e4dSLisandro Dalcin     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
818c778c55SBarry Smith     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
8211bd1e4dSLisandro Dalcin     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8311bd1e4dSLisandro Dalcin     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8411bd1e4dSLisandro Dalcin     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
8511bd1e4dSLisandro Dalcin     *a   = B;
8611bd1e4dSLisandro Dalcin     ierr = MatDestroy(&B);CHKERRQ(ierr);
872205254eSKarl Rupp   } else *a = B;
880de54da6SSatish Balay   PetscFunctionReturn(0);
890de54da6SSatish Balay }
900de54da6SSatish Balay 
9113f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
928965ea79SLois Curfman McInnes {
9339b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94dfbe8321SBarry Smith   PetscErrorCode ierr;
95d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96ace3abfcSBarry Smith   PetscBool      roworiented = A->roworiented;
978965ea79SLois Curfman McInnes 
983a40ed3dSBarry Smith   PetscFunctionBegin;
998965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1005ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
101e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1028965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1038965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
10439b7565bSBarry Smith       if (roworiented) {
10539b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1063a40ed3dSBarry Smith       } else {
1078965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1085ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
109e32f2f54SBarry Smith           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
11039b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
11139b7565bSBarry Smith         }
1128965ea79SLois Curfman McInnes       }
1132205254eSKarl Rupp     } else if (!A->donotstash) {
1145080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
11539b7565bSBarry Smith       if (roworiented) {
116b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117d36fbae8SSatish Balay       } else {
118b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
11939b7565bSBarry Smith       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes   }
1223a40ed3dSBarry Smith   PetscFunctionReturn(0);
123b49de8d1SLois Curfman McInnes }
124b49de8d1SLois Curfman McInnes 
12513f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126b49de8d1SLois Curfman McInnes {
127b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128dfbe8321SBarry Smith   PetscErrorCode ierr;
129d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130b49de8d1SLois Curfman McInnes 
1313a40ed3dSBarry Smith   PetscFunctionBegin;
132b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
133e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
136b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
137b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
138e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139e7e72b3dSBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141b49de8d1SLois Curfman McInnes       }
142e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
1438965ea79SLois Curfman McInnes   }
1443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1458965ea79SLois Curfman McInnes }
1468965ea79SLois Curfman McInnes 
147d3042a70SBarry Smith static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148ff14e315SSatish Balay {
149ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150dfbe8321SBarry Smith   PetscErrorCode ierr;
151ff14e315SSatish Balay 
1523a40ed3dSBarry Smith   PetscFunctionBegin;
1538c778c55SBarry Smith   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
1543a40ed3dSBarry Smith   PetscFunctionReturn(0);
155ff14e315SSatish Balay }
156ff14e315SSatish Balay 
1578572280aSBarry Smith static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
1588572280aSBarry Smith {
1598572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1608572280aSBarry Smith   PetscErrorCode ierr;
1618572280aSBarry Smith 
1628572280aSBarry Smith   PetscFunctionBegin;
1638572280aSBarry Smith   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
1648572280aSBarry Smith   PetscFunctionReturn(0);
1658572280aSBarry Smith }
1668572280aSBarry Smith 
167d3042a70SBarry Smith static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168d3042a70SBarry Smith {
169d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170d3042a70SBarry Smith   PetscErrorCode ierr;
171d3042a70SBarry Smith 
172d3042a70SBarry Smith   PetscFunctionBegin;
173d3042a70SBarry Smith   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174d3042a70SBarry Smith   PetscFunctionReturn(0);
175d3042a70SBarry Smith }
176d3042a70SBarry Smith 
177d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178d3042a70SBarry Smith {
179d3042a70SBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180d3042a70SBarry Smith   PetscErrorCode ierr;
181d3042a70SBarry Smith 
182d3042a70SBarry Smith   PetscFunctionBegin;
183d3042a70SBarry Smith   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184d3042a70SBarry Smith   PetscFunctionReturn(0);
185d3042a70SBarry Smith }
186d3042a70SBarry Smith 
1877dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188ca3fa75bSLois Curfman McInnes {
189ca3fa75bSLois Curfman McInnes   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190ca3fa75bSLois Curfman McInnes   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
1916849ba73SBarry Smith   PetscErrorCode ierr;
1924aa3045dSJed Brown   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
1935d0c19d7SBarry Smith   const PetscInt *irow,*icol;
19487828ca2SBarry Smith   PetscScalar    *av,*bv,*v = lmat->v;
195ca3fa75bSLois Curfman McInnes   Mat            newmat;
1964aa3045dSJed Brown   IS             iscol_local;
19742a884f0SBarry Smith   MPI_Comm       comm_is,comm_mat;
198ca3fa75bSLois Curfman McInnes 
199ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
20042a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
20142a884f0SBarry Smith   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
20242a884f0SBarry Smith   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
20342a884f0SBarry Smith 
2044aa3045dSJed Brown   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
2064aa3045dSJed Brown   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
2094aa3045dSJed Brown   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210ca3fa75bSLois Curfman McInnes 
211ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
2127eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
2137eba5e9cSLois Curfman McInnes      original matrix! */
214ca3fa75bSLois Curfman McInnes 
215ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
2167eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217ca3fa75bSLois Curfman McInnes 
218ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
219ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
220e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2217eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
222ca3fa75bSLois Curfman McInnes     newmat = *B;
223ca3fa75bSLois Curfman McInnes   } else {
224ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
225ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
2264aa3045dSJed Brown     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
2277adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2280298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229ca3fa75bSLois Curfman McInnes   }
230ca3fa75bSLois Curfman McInnes 
231ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
232ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
233ca3fa75bSLois Curfman McInnes   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234ca3fa75bSLois Curfman McInnes 
2354aa3045dSJed Brown   for (i=0; i<Ncols; i++) {
23625a33276SHong Zhang     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2387eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
239ca3fa75bSLois Curfman McInnes     }
240ca3fa75bSLois Curfman McInnes   }
241ca3fa75bSLois Curfman McInnes 
242ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
243ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245ca3fa75bSLois Curfman McInnes 
246ca3fa75bSLois Curfman McInnes   /* Free work space */
247ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
2485bdf786aSShri Abhyankar   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
24932bb1f2dSLisandro Dalcin   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250ca3fa75bSLois Curfman McInnes   *B   = newmat;
251ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
252ca3fa75bSLois Curfman McInnes }
253ca3fa75bSLois Curfman McInnes 
2548c778c55SBarry Smith PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255ff14e315SSatish Balay {
25673a71a0fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
25773a71a0fSBarry Smith   PetscErrorCode ierr;
25873a71a0fSBarry Smith 
2593a40ed3dSBarry Smith   PetscFunctionBegin;
2608c778c55SBarry Smith   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
2613a40ed3dSBarry Smith   PetscFunctionReturn(0);
262ff14e315SSatish Balay }
263ff14e315SSatish Balay 
2648572280aSBarry Smith PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
2658572280aSBarry Smith {
2668572280aSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
2678572280aSBarry Smith   PetscErrorCode ierr;
2688572280aSBarry Smith 
2698572280aSBarry Smith   PetscFunctionBegin;
2708572280aSBarry Smith   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
2718572280aSBarry Smith   PetscFunctionReturn(0);
2728572280aSBarry Smith }
2738572280aSBarry Smith 
274dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
2758965ea79SLois Curfman McInnes {
27639ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277ce94432eSBarry Smith   MPI_Comm       comm;
278dfbe8321SBarry Smith   PetscErrorCode ierr;
27913f74950SBarry Smith   PetscInt       nstash,reallocs;
2808965ea79SLois Curfman McInnes   InsertMode     addv;
2818965ea79SLois Curfman McInnes 
2823a40ed3dSBarry Smith   PetscFunctionBegin;
283ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2848965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
285b2566f29SBarry Smith   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286ce94432eSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
2888965ea79SLois Curfman McInnes 
289d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
2908798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
2923a40ed3dSBarry Smith   PetscFunctionReturn(0);
2938965ea79SLois Curfman McInnes }
2948965ea79SLois Curfman McInnes 
295dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
2968965ea79SLois Curfman McInnes {
29739ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
2986849ba73SBarry Smith   PetscErrorCode ierr;
29913f74950SBarry Smith   PetscInt       i,*row,*col,flg,j,rstart,ncols;
30013f74950SBarry Smith   PetscMPIInt    n;
30187828ca2SBarry Smith   PetscScalar    *val;
3028965ea79SLois Curfman McInnes 
3033a40ed3dSBarry Smith   PetscFunctionBegin;
3048965ea79SLois Curfman McInnes   /*  wait on receives */
3057ef1d9bdSSatish Balay   while (1) {
3068798bf22SSatish Balay     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
3077ef1d9bdSSatish Balay     if (!flg) break;
3088965ea79SLois Curfman McInnes 
3097ef1d9bdSSatish Balay     for (i=0; i<n;) {
3107ef1d9bdSSatish Balay       /* Now identify the consecutive vals belonging to the same row */
3112205254eSKarl Rupp       for (j=i,rstart=row[j]; j<n; j++) {
3122205254eSKarl Rupp         if (row[j] != rstart) break;
3132205254eSKarl Rupp       }
3147ef1d9bdSSatish Balay       if (j < n) ncols = j-i;
3157ef1d9bdSSatish Balay       else       ncols = n-i;
3167ef1d9bdSSatish Balay       /* Now assemble all these values with a single function call */
3174b4eb8d3SJed Brown       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
3187ef1d9bdSSatish Balay       i    = j;
3198965ea79SLois Curfman McInnes     }
3207ef1d9bdSSatish Balay   }
3218798bf22SSatish Balay   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
3228965ea79SLois Curfman McInnes 
32339ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
32439ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
3258965ea79SLois Curfman McInnes 
3266d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
32739ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
3288965ea79SLois Curfman McInnes   }
3293a40ed3dSBarry Smith   PetscFunctionReturn(0);
3308965ea79SLois Curfman McInnes }
3318965ea79SLois Curfman McInnes 
332dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
3338965ea79SLois Curfman McInnes {
334dfbe8321SBarry Smith   PetscErrorCode ierr;
33539ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
3363a40ed3dSBarry Smith 
3373a40ed3dSBarry Smith   PetscFunctionBegin;
3383a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
3393a40ed3dSBarry Smith   PetscFunctionReturn(0);
3408965ea79SLois Curfman McInnes }
3418965ea79SLois Curfman McInnes 
3428965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
3438965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
3448965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
3453501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
3468965ea79SLois Curfman McInnes    routine.
3478965ea79SLois Curfman McInnes */
3482b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
3498965ea79SLois Curfman McInnes {
35039ddd567SLois Curfman McInnes   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
3516849ba73SBarry Smith   PetscErrorCode    ierr;
352d0f46423SBarry Smith   PetscInt          i,*owners = A->rmap->range;
35376ec1555SBarry Smith   PetscInt          *sizes,j,idx,nsends;
35413f74950SBarry Smith   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
3557adad957SLisandro Dalcin   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
35613f74950SBarry Smith   PetscInt          *lens,*lrows,*values;
35713f74950SBarry Smith   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358ce94432eSBarry Smith   MPI_Comm          comm;
3598965ea79SLois Curfman McInnes   MPI_Request       *send_waits,*recv_waits;
3608965ea79SLois Curfman McInnes   MPI_Status        recv_status,*send_status;
361ace3abfcSBarry Smith   PetscBool         found;
36297b48c8fSBarry Smith   const PetscScalar *xx;
36397b48c8fSBarry Smith   PetscScalar       *bb;
3648965ea79SLois Curfman McInnes 
3653a40ed3dSBarry Smith   PetscFunctionBegin;
366ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367ce94432eSBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368b9679d65SBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
3698965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
370f628708eSJed Brown   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371037dbc42SBarry Smith   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
3728965ea79SLois Curfman McInnes   for (i=0; i<N; i++) {
3738965ea79SLois Curfman McInnes     idx   = rows[i];
37435d8aa7fSBarry Smith     found = PETSC_FALSE;
3758965ea79SLois Curfman McInnes     for (j=0; j<size; j++) {
3768965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
37776ec1555SBarry Smith         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
3788965ea79SLois Curfman McInnes       }
3798965ea79SLois Curfman McInnes     }
380e32f2f54SBarry Smith     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
3818965ea79SLois Curfman McInnes   }
3822205254eSKarl Rupp   nsends = 0;
38376ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
3848965ea79SLois Curfman McInnes 
3858965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
38676ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
3878965ea79SLois Curfman McInnes 
3888965ea79SLois Curfman McInnes   /* post receives:   */
389785e854fSJed Brown   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
3918965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
39213f74950SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3938965ea79SLois Curfman McInnes   }
3948965ea79SLois Curfman McInnes 
3958965ea79SLois Curfman McInnes   /* do sends:
3968965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
3978965ea79SLois Curfman McInnes          the ith processor
3988965ea79SLois Curfman McInnes   */
399854ce69bSBarry Smith   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400854ce69bSBarry Smith   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401854ce69bSBarry Smith   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
4028965ea79SLois Curfman McInnes 
4038965ea79SLois Curfman McInnes   starts[0] = 0;
40476ec1555SBarry Smith   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
4052205254eSKarl Rupp   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
4062205254eSKarl Rupp 
4072205254eSKarl Rupp   starts[0] = 0;
40876ec1555SBarry Smith   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
4098965ea79SLois Curfman McInnes   count = 0;
4108965ea79SLois Curfman McInnes   for (i=0; i<size; i++) {
41176ec1555SBarry Smith     if (sizes[2*i+1]) {
41276ec1555SBarry Smith       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
4138965ea79SLois Curfman McInnes     }
4148965ea79SLois Curfman McInnes   }
415606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
4168965ea79SLois Curfman McInnes 
4178965ea79SLois Curfman McInnes   base = owners[rank];
4188965ea79SLois Curfman McInnes 
4198965ea79SLois Curfman McInnes   /*  wait on receives */
420dcca6d9dSJed Brown   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
42174ed9c26SBarry Smith   count = nrecvs;
42274ed9c26SBarry Smith   slen  = 0;
4238965ea79SLois Curfman McInnes   while (count) {
424ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
4258965ea79SLois Curfman McInnes     /* unpack receives into our local space */
42613f74950SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
4272205254eSKarl Rupp 
4288965ea79SLois Curfman McInnes     source[imdex] = recv_status.MPI_SOURCE;
4298965ea79SLois Curfman McInnes     lens[imdex]   = n;
4308965ea79SLois Curfman McInnes     slen += n;
4318965ea79SLois Curfman McInnes     count--;
4328965ea79SLois Curfman McInnes   }
433606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
4348965ea79SLois Curfman McInnes 
4358965ea79SLois Curfman McInnes   /* move the data into the send scatter */
436854ce69bSBarry Smith   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
4378965ea79SLois Curfman McInnes   count = 0;
4388965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
4398965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
4408965ea79SLois Curfman McInnes     for (j=0; j<lens[i]; j++) {
4418965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
4428965ea79SLois Curfman McInnes     }
4438965ea79SLois Curfman McInnes   }
444606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
44574ed9c26SBarry Smith   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
44776ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
4488965ea79SLois Curfman McInnes 
44997b48c8fSBarry Smith   /* fix right hand side if needed */
45097b48c8fSBarry Smith   if (x && b) {
45197b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
45297b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
45397b48c8fSBarry Smith     for (i=0; i<slen; i++) {
45497b48c8fSBarry Smith       bb[lrows[i]] = diag*xx[lrows[i]];
45597b48c8fSBarry Smith     }
45697b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
45797b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
45897b48c8fSBarry Smith   }
45997b48c8fSBarry Smith 
4608965ea79SLois Curfman McInnes   /* actually zap the local rows */
461b9679d65SBarry Smith   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462e2eb51b1SBarry Smith   if (diag != 0.0) {
463b9679d65SBarry Smith     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464b9679d65SBarry Smith     PetscInt     m   = ll->lda, i;
465b9679d65SBarry Smith 
466b9679d65SBarry Smith     for (i=0; i<slen; i++) {
467b9679d65SBarry Smith       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468b9679d65SBarry Smith     }
469b9679d65SBarry Smith   }
470606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4718965ea79SLois Curfman McInnes 
4728965ea79SLois Curfman McInnes   /* wait on sends */
4738965ea79SLois Curfman McInnes   if (nsends) {
474785e854fSJed Brown     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
4778965ea79SLois Curfman McInnes   }
478606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
4803a40ed3dSBarry Smith   PetscFunctionReturn(0);
4818965ea79SLois Curfman McInnes }
4828965ea79SLois Curfman McInnes 
483cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487cc2e6a90SBarry Smith 
488dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4898965ea79SLois Curfman McInnes {
49039ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491dfbe8321SBarry Smith   PetscErrorCode ierr;
492c456f294SBarry Smith 
4933a40ed3dSBarry Smith   PetscFunctionBegin;
494ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496cc2e6a90SBarry Smith   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4973a40ed3dSBarry Smith   PetscFunctionReturn(0);
4988965ea79SLois Curfman McInnes }
4998965ea79SLois Curfman McInnes 
500dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
5018965ea79SLois Curfman McInnes {
50239ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503dfbe8321SBarry Smith   PetscErrorCode ierr;
504c456f294SBarry Smith 
5053a40ed3dSBarry Smith   PetscFunctionBegin;
506ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508cc2e6a90SBarry Smith   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
5093a40ed3dSBarry Smith   PetscFunctionReturn(0);
5108965ea79SLois Curfman McInnes }
5118965ea79SLois Curfman McInnes 
512dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513096963f5SLois Curfman McInnes {
514096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515dfbe8321SBarry Smith   PetscErrorCode ierr;
51687828ca2SBarry Smith   PetscScalar    zero = 0.0;
517096963f5SLois Curfman McInnes 
5183a40ed3dSBarry Smith   PetscFunctionBegin;
5192dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520cc2e6a90SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5233a40ed3dSBarry Smith   PetscFunctionReturn(0);
524096963f5SLois Curfman McInnes }
525096963f5SLois Curfman McInnes 
526dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527096963f5SLois Curfman McInnes {
528096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529dfbe8321SBarry Smith   PetscErrorCode ierr;
530096963f5SLois Curfman McInnes 
5313a40ed3dSBarry Smith   PetscFunctionBegin;
5323501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533cc2e6a90SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5363a40ed3dSBarry Smith   PetscFunctionReturn(0);
537096963f5SLois Curfman McInnes }
538096963f5SLois Curfman McInnes 
539dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
5408965ea79SLois Curfman McInnes {
54139ddd567SLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542096963f5SLois Curfman McInnes   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543dfbe8321SBarry Smith   PetscErrorCode ierr;
544d0f46423SBarry Smith   PetscInt       len,i,n,m = A->rmap->n,radd;
54587828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
546ed3cc1f0SBarry Smith 
5473a40ed3dSBarry Smith   PetscFunctionBegin;
5482dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
5491ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553d0f46423SBarry Smith   radd = A->rmap->rstart*m;
55444cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
555096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
556096963f5SLois Curfman McInnes   }
5571ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5583a40ed3dSBarry Smith   PetscFunctionReturn(0);
5598965ea79SLois Curfman McInnes }
5608965ea79SLois Curfman McInnes 
561dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5628965ea79SLois Curfman McInnes {
5633501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564dfbe8321SBarry Smith   PetscErrorCode ierr;
565ed3cc1f0SBarry Smith 
5663a40ed3dSBarry Smith   PetscFunctionBegin;
567aa482453SBarry Smith #if defined(PETSC_USE_LOG)
568d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
5698965ea79SLois Curfman McInnes #endif
5708798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
5716bf464f9SBarry Smith   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
5726bf464f9SBarry Smith   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
5736bf464f9SBarry Smith   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
57401b82886SBarry Smith 
575bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
5778baccfbdSHong Zhang 
5788baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
5798572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
5808572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
5818572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
5848baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5858baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
5868baccfbdSHong Zhang #endif
587bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5918baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5928baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5938baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
59486aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
59586aefd0dSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
5963a40ed3dSBarry Smith   PetscFunctionReturn(0);
5978965ea79SLois Curfman McInnes }
59839ddd567SLois Curfman McInnes 
5996849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
6008965ea79SLois Curfman McInnes {
60139ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602dfbe8321SBarry Smith   PetscErrorCode    ierr;
603aa05aa95SBarry Smith   PetscViewerFormat format;
604aa05aa95SBarry Smith   int               fd;
605d0f46423SBarry Smith   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606aa05aa95SBarry Smith   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607578230a0SSatish Balay   PetscScalar       *work,*v,*vv;
608aa05aa95SBarry Smith   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
6097056b6fcSBarry Smith 
6103a40ed3dSBarry Smith   PetscFunctionBegin;
61139ddd567SLois Curfman McInnes   if (mdn->size == 1) {
61239ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613aa05aa95SBarry Smith   } else {
614aa05aa95SBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615ce94432eSBarry Smith     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617aa05aa95SBarry Smith 
618aa05aa95SBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619f4403165SShri Abhyankar     if (format == PETSC_VIEWER_NATIVE) {
620aa05aa95SBarry Smith 
621aa05aa95SBarry Smith       if (!rank) {
622aa05aa95SBarry Smith         /* store the matrix as a dense matrix */
6230700a824SBarry Smith         header[0] = MAT_FILE_CLASSID;
624d0f46423SBarry Smith         header[1] = mat->rmap->N;
625aa05aa95SBarry Smith         header[2] = N;
626aa05aa95SBarry Smith         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627aa05aa95SBarry Smith         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628aa05aa95SBarry Smith 
629aa05aa95SBarry Smith         /* get largest work array needed for transposing array */
630d0f46423SBarry Smith         mmax = mat->rmap->n;
631aa05aa95SBarry Smith         for (i=1; i<size; i++) {
632d0f46423SBarry Smith           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
6338965ea79SLois Curfman McInnes         }
634785e854fSJed Brown         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635aa05aa95SBarry Smith 
636aa05aa95SBarry Smith         /* write out local array, by rows */
637d0f46423SBarry Smith         m = mat->rmap->n;
638aa05aa95SBarry Smith         v = a->v;
639aa05aa95SBarry Smith         for (j=0; j<N; j++) {
640aa05aa95SBarry Smith           for (i=0; i<m; i++) {
641578230a0SSatish Balay             work[j + i*N] = *v++;
642aa05aa95SBarry Smith           }
643aa05aa95SBarry Smith         }
644aa05aa95SBarry Smith         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645aa05aa95SBarry Smith         /* get largest work array to receive messages from other processes, excludes process zero */
646aa05aa95SBarry Smith         mmax = 0;
647aa05aa95SBarry Smith         for (i=1; i<size; i++) {
648d0f46423SBarry Smith           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649aa05aa95SBarry Smith         }
650785e854fSJed Brown         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651aa05aa95SBarry Smith         for (k = 1; k < size; k++) {
652f8009846SMatthew Knepley           v    = vv;
653d0f46423SBarry Smith           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654ce94432eSBarry Smith           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655aa05aa95SBarry Smith 
656aa05aa95SBarry Smith           for (j = 0; j < N; j++) {
657aa05aa95SBarry Smith             for (i = 0; i < m; i++) {
658578230a0SSatish Balay               work[j + i*N] = *v++;
659aa05aa95SBarry Smith             }
660aa05aa95SBarry Smith           }
661aa05aa95SBarry Smith           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662aa05aa95SBarry Smith         }
663aa05aa95SBarry Smith         ierr = PetscFree(work);CHKERRQ(ierr);
664578230a0SSatish Balay         ierr = PetscFree(vv);CHKERRQ(ierr);
665aa05aa95SBarry Smith       } else {
666ce94432eSBarry Smith         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667aa05aa95SBarry Smith       }
6686a9046bcSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669aa05aa95SBarry Smith   }
6703a40ed3dSBarry Smith   PetscFunctionReturn(0);
6718965ea79SLois Curfman McInnes }
6728965ea79SLois Curfman McInnes 
6737da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
6749804daf3SBarry Smith #include <petscdraw.h>
6756849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
6768965ea79SLois Curfman McInnes {
67739ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678dfbe8321SBarry Smith   PetscErrorCode    ierr;
6797da1fb6eSBarry Smith   PetscMPIInt       rank = mdn->rank;
68019fd82e9SBarry Smith   PetscViewerType   vtype;
681ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
682b0a32e0cSBarry Smith   PetscViewer       sviewer;
683f3ef73ceSBarry Smith   PetscViewerFormat format;
6848965ea79SLois Curfman McInnes 
6853a40ed3dSBarry Smith   PetscFunctionBegin;
686251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
68832077d6dSBarry Smith   if (iascii) {
689b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6924e220ebcSLois Curfman McInnes       MatInfo info;
693888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
6941575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
6957b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6971575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
6985d00a290SHong Zhang       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
6993a40ed3dSBarry Smith       PetscFunctionReturn(0);
700fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
7013a40ed3dSBarry Smith       PetscFunctionReturn(0);
7028965ea79SLois Curfman McInnes     }
703f1af5d2fSBarry Smith   } else if (isdraw) {
704b0a32e0cSBarry Smith     PetscDraw draw;
705ace3abfcSBarry Smith     PetscBool isnull;
706f1af5d2fSBarry Smith 
707b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
710f1af5d2fSBarry Smith   }
71177ed5343SBarry Smith 
7127da1fb6eSBarry Smith   {
7138965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
7148965ea79SLois Curfman McInnes     Mat         A;
715d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716ba8c8a56SBarry Smith     PetscInt    *cols;
717ba8c8a56SBarry Smith     PetscScalar *vals;
7188965ea79SLois Curfman McInnes 
719ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
7208965ea79SLois Curfman McInnes     if (!rank) {
721f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
7223a40ed3dSBarry Smith     } else {
723f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
7248965ea79SLois Curfman McInnes     }
7257adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
7270298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
7283bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
7298965ea79SLois Curfman McInnes 
73039ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
73139ddd567SLois Curfman McInnes        but it's quick for now */
73251022da4SBarry Smith     A->insertmode = INSERT_VALUES;
7332205254eSKarl Rupp 
7342205254eSKarl Rupp     row = mat->rmap->rstart;
7352205254eSKarl Rupp     m   = mdn->A->rmap->n;
7368965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
737ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
74039ddd567SLois Curfman McInnes       row++;
7418965ea79SLois Curfman McInnes     }
7428965ea79SLois Curfman McInnes 
7436d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7446d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7453f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746b9b97703SBarry Smith     if (!rank) {
7471a9d3c3cSBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
7487da1fb6eSBarry Smith       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
7498965ea79SLois Curfman McInnes     }
7503f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
7526bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
7538965ea79SLois Curfman McInnes   }
7543a40ed3dSBarry Smith   PetscFunctionReturn(0);
7558965ea79SLois Curfman McInnes }
7568965ea79SLois Curfman McInnes 
757dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
7588965ea79SLois Curfman McInnes {
759dfbe8321SBarry Smith   PetscErrorCode ierr;
760ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw,issocket;
7618965ea79SLois Curfman McInnes 
762433994e6SBarry Smith   PetscFunctionBegin;
763251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
7670f5bd95cSBarry Smith 
76832077d6dSBarry Smith   if (iascii || issocket || isdraw) {
769f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
7700f5bd95cSBarry Smith   } else if (isbinary) {
7713a40ed3dSBarry Smith     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
77211aeaf0aSBarry Smith   }
7733a40ed3dSBarry Smith   PetscFunctionReturn(0);
7748965ea79SLois Curfman McInnes }
7758965ea79SLois Curfman McInnes 
776dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
7778965ea79SLois Curfman McInnes {
7783501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
7793501a2bdSLois Curfman McInnes   Mat            mdn  = mat->A;
780dfbe8321SBarry Smith   PetscErrorCode ierr;
781329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
7828965ea79SLois Curfman McInnes 
7833a40ed3dSBarry Smith   PetscFunctionBegin;
7844e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7852205254eSKarl Rupp 
7864e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
7872205254eSKarl Rupp 
7884e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
7894e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
7908965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7914e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7924e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7934e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7944e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7954e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7968965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
797b2566f29SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7982205254eSKarl Rupp 
7994e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8004e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8014e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8024e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8034e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8048965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
805b2566f29SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
8062205254eSKarl Rupp 
8074e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
8084e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
8094e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
8104e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
8114e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
8128965ea79SLois Curfman McInnes   }
8134e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
8144e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
8154e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
8163a40ed3dSBarry Smith   PetscFunctionReturn(0);
8178965ea79SLois Curfman McInnes }
8188965ea79SLois Curfman McInnes 
819ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
8208965ea79SLois Curfman McInnes {
82139ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822dfbe8321SBarry Smith   PetscErrorCode ierr;
8238965ea79SLois Curfman McInnes 
8243a40ed3dSBarry Smith   PetscFunctionBegin;
82512c028f9SKris Buschelman   switch (op) {
826512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
82712c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
82812c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
82943674050SBarry Smith     MatCheckPreallocated(A,1);
8304e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
83112c028f9SKris Buschelman     break;
83212c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
83343674050SBarry Smith     MatCheckPreallocated(A,1);
8344e0d8c25SBarry Smith     a->roworiented = flg;
8354e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
83612c028f9SKris Buschelman     break;
8374e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
83813fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
83912c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
840290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
84112c028f9SKris Buschelman     break;
84212c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
8434e0d8c25SBarry Smith     a->donotstash = flg;
84412c028f9SKris Buschelman     break;
84577e54ba9SKris Buschelman   case MAT_SYMMETRIC:
84677e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
8479a4540c5SBarry Smith   case MAT_HERMITIAN:
8489a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
849600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
850290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
85177e54ba9SKris Buschelman     break;
85212c028f9SKris Buschelman   default:
853e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
8543a40ed3dSBarry Smith   }
8553a40ed3dSBarry Smith   PetscFunctionReturn(0);
8568965ea79SLois Curfman McInnes }
8578965ea79SLois Curfman McInnes 
8588965ea79SLois Curfman McInnes 
859dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
8605b2fa520SLois Curfman McInnes {
8615b2fa520SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
8625b2fa520SLois Curfman McInnes   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863bca11509SBarry Smith   const PetscScalar *l,*r;
864bca11509SBarry Smith   PetscScalar       x,*v;
865dfbe8321SBarry Smith   PetscErrorCode    ierr;
866d0f46423SBarry Smith   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
8675b2fa520SLois Curfman McInnes 
8685b2fa520SLois Curfman McInnes   PetscFunctionBegin;
86972d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
8705b2fa520SLois Curfman McInnes   if (ll) {
87172d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872e32f2f54SBarry Smith     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873bca11509SBarry Smith     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
8745b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
8755b2fa520SLois Curfman McInnes       x = l[i];
8765b2fa520SLois Curfman McInnes       v = mat->v + i;
8775b2fa520SLois Curfman McInnes       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
8785b2fa520SLois Curfman McInnes     }
879bca11509SBarry Smith     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8815b2fa520SLois Curfman McInnes   }
8825b2fa520SLois Curfman McInnes   if (rr) {
883175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884e32f2f54SBarry Smith     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885ca9f406cSSatish Balay     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886ca9f406cSSatish Balay     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887bca11509SBarry Smith     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
8885b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
8895b2fa520SLois Curfman McInnes       x = r[i];
8905b2fa520SLois Curfman McInnes       v = mat->v + i*m;
8912205254eSKarl Rupp       for (j=0; j<m; j++) (*v++) *= x;
8925b2fa520SLois Curfman McInnes     }
893bca11509SBarry Smith     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8955b2fa520SLois Curfman McInnes   }
8965b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8975b2fa520SLois Curfman McInnes }
8985b2fa520SLois Curfman McInnes 
899dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900096963f5SLois Curfman McInnes {
9013501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
9023501a2bdSLois Curfman McInnes   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903dfbe8321SBarry Smith   PetscErrorCode ierr;
90413f74950SBarry Smith   PetscInt       i,j;
905329f5518SBarry Smith   PetscReal      sum = 0.0;
90687828ca2SBarry Smith   PetscScalar    *v  = mat->v;
9073501a2bdSLois Curfman McInnes 
9083a40ed3dSBarry Smith   PetscFunctionBegin;
9093501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
910064f8208SBarry Smith     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
9113501a2bdSLois Curfman McInnes   } else {
9123501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
913d0f46423SBarry Smith       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
9153501a2bdSLois Curfman McInnes       }
916b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
9178f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
918dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
9193a40ed3dSBarry Smith     } else if (type == NORM_1) {
920329f5518SBarry Smith       PetscReal *tmp,*tmp2;
921dcca6d9dSJed Brown       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
92274ed9c26SBarry Smith       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
92374ed9c26SBarry Smith       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924064f8208SBarry Smith       *nrm = 0.0;
9253501a2bdSLois Curfman McInnes       v    = mat->v;
926d0f46423SBarry Smith       for (j=0; j<mdn->A->cmap->n; j++) {
927d0f46423SBarry Smith         for (i=0; i<mdn->A->rmap->n; i++) {
92867e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
9293501a2bdSLois Curfman McInnes         }
9303501a2bdSLois Curfman McInnes       }
931b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932d0f46423SBarry Smith       for (j=0; j<A->cmap->N; j++) {
933064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
9343501a2bdSLois Curfman McInnes       }
9358627564fSBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936d0f46423SBarry Smith       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
9373a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
938329f5518SBarry Smith       PetscReal ntemp;
9393501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940b2566f29SBarry Smith       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
9423501a2bdSLois Curfman McInnes   }
9433a40ed3dSBarry Smith   PetscFunctionReturn(0);
9443501a2bdSLois Curfman McInnes }
9453501a2bdSLois Curfman McInnes 
946fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
9473501a2bdSLois Curfman McInnes {
9483501a2bdSLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
9493501a2bdSLois Curfman McInnes   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
9503501a2bdSLois Curfman McInnes   Mat            B;
951d0f46423SBarry Smith   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
9526849ba73SBarry Smith   PetscErrorCode ierr;
95313f74950SBarry Smith   PetscInt       j,i;
95487828ca2SBarry Smith   PetscScalar    *v;
9553501a2bdSLois Curfman McInnes 
9563a40ed3dSBarry Smith   PetscFunctionBegin;
957cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
9607adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
9610298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962fc4dec0aSBarry Smith   } else {
963fc4dec0aSBarry Smith     B = *matout;
964fc4dec0aSBarry Smith   }
9653501a2bdSLois Curfman McInnes 
966d0f46423SBarry Smith   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967785e854fSJed Brown   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
9683501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
9691acff37aSSatish Balay   for (j=0; j<n; j++) {
9703501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
9713501a2bdSLois Curfman McInnes     v   += m;
9723501a2bdSLois Curfman McInnes   }
973606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
9746d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9756d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9773501a2bdSLois Curfman McInnes     *matout = B;
9783501a2bdSLois Curfman McInnes   } else {
97928be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
9803501a2bdSLois Curfman McInnes   }
9813a40ed3dSBarry Smith   PetscFunctionReturn(0);
982096963f5SLois Curfman McInnes }
983096963f5SLois Curfman McInnes 
98444cd7ae7SLois Curfman McInnes 
9856849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986d84338a6SBarry Smith extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
9878965ea79SLois Curfman McInnes 
9884994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A)
989273d9f13SBarry Smith {
990dfbe8321SBarry Smith   PetscErrorCode ierr;
991273d9f13SBarry Smith 
992273d9f13SBarry Smith   PetscFunctionBegin;
993273d9f13SBarry Smith   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994273d9f13SBarry Smith   PetscFunctionReturn(0);
995273d9f13SBarry Smith }
996273d9f13SBarry Smith 
997488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998488007eeSBarry Smith {
999488007eeSBarry Smith   PetscErrorCode ierr;
1000488007eeSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001488007eeSBarry Smith 
1002488007eeSBarry Smith   PetscFunctionBegin;
1003488007eeSBarry Smith   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004a3fa217bSJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005488007eeSBarry Smith   PetscFunctionReturn(0);
1006488007eeSBarry Smith }
1007488007eeSBarry Smith 
10087087cfbeSBarry Smith PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009ba337c44SJed Brown {
1010ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011ba337c44SJed Brown   PetscErrorCode ierr;
1012ba337c44SJed Brown 
1013ba337c44SJed Brown   PetscFunctionBegin;
1014ba337c44SJed Brown   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015ba337c44SJed Brown   PetscFunctionReturn(0);
1016ba337c44SJed Brown }
1017ba337c44SJed Brown 
1018ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A)
1019ba337c44SJed Brown {
1020ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021ba337c44SJed Brown   PetscErrorCode ierr;
1022ba337c44SJed Brown 
1023ba337c44SJed Brown   PetscFunctionBegin;
1024ba337c44SJed Brown   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025ba337c44SJed Brown   PetscFunctionReturn(0);
1026ba337c44SJed Brown }
1027ba337c44SJed Brown 
1028ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029ba337c44SJed Brown {
1030ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031ba337c44SJed Brown   PetscErrorCode ierr;
1032ba337c44SJed Brown 
1033ba337c44SJed Brown   PetscFunctionBegin;
1034ba337c44SJed Brown   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035ba337c44SJed Brown   PetscFunctionReturn(0);
1036ba337c44SJed Brown }
1037ba337c44SJed Brown 
10380716a85fSBarry Smith extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
10390716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
10400716a85fSBarry Smith {
10410716a85fSBarry Smith   PetscErrorCode ierr;
10420716a85fSBarry Smith   PetscInt       i,n;
10430716a85fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
10440716a85fSBarry Smith   PetscReal      *work;
10450716a85fSBarry Smith 
10460716a85fSBarry Smith   PetscFunctionBegin;
10470298fd71SBarry Smith   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048785e854fSJed Brown   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
10490716a85fSBarry Smith   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
10500716a85fSBarry Smith   if (type == NORM_2) {
10510716a85fSBarry Smith     for (i=0; i<n; i++) work[i] *= work[i];
10520716a85fSBarry Smith   }
10530716a85fSBarry Smith   if (type == NORM_INFINITY) {
1054b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
10550716a85fSBarry Smith   } else {
1056b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
10570716a85fSBarry Smith   }
10580716a85fSBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
10590716a85fSBarry Smith   if (type == NORM_2) {
10608f1a2a5eSBarry Smith     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
10610716a85fSBarry Smith   }
10620716a85fSBarry Smith   PetscFunctionReturn(0);
10630716a85fSBarry Smith }
10640716a85fSBarry Smith 
106573a71a0fSBarry Smith static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
106673a71a0fSBarry Smith {
106773a71a0fSBarry Smith   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
106873a71a0fSBarry Smith   PetscErrorCode ierr;
106973a71a0fSBarry Smith   PetscScalar    *a;
107073a71a0fSBarry Smith   PetscInt       m,n,i;
107173a71a0fSBarry Smith 
107273a71a0fSBarry Smith   PetscFunctionBegin;
107373a71a0fSBarry Smith   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
10748c778c55SBarry Smith   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
107573a71a0fSBarry Smith   for (i=0; i<m*n; i++) {
107673a71a0fSBarry Smith     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
107773a71a0fSBarry Smith   }
10788c778c55SBarry Smith   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
107973a71a0fSBarry Smith   PetscFunctionReturn(0);
108073a71a0fSBarry Smith }
108173a71a0fSBarry Smith 
1082fd4e9aacSBarry Smith extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083fd4e9aacSBarry Smith 
10843b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
10853b49f96aSBarry Smith {
10863b49f96aSBarry Smith   PetscFunctionBegin;
10873b49f96aSBarry Smith   *missing = PETSC_FALSE;
10883b49f96aSBarry Smith   PetscFunctionReturn(0);
10893b49f96aSBarry Smith }
10903b49f96aSBarry Smith 
1091*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1092*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1093*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1094*cc48ffa7SToby Isaac 
10958965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
109609dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
109709dc0095SBarry Smith                                         MatGetRow_MPIDense,
109809dc0095SBarry Smith                                         MatRestoreRow_MPIDense,
109909dc0095SBarry Smith                                         MatMult_MPIDense,
110097304618SKris Buschelman                                 /*  4*/ MatMultAdd_MPIDense,
11017c922b88SBarry Smith                                         MatMultTranspose_MPIDense,
11027c922b88SBarry Smith                                         MatMultTransposeAdd_MPIDense,
11038965ea79SLois Curfman McInnes                                         0,
110409dc0095SBarry Smith                                         0,
110509dc0095SBarry Smith                                         0,
110697304618SKris Buschelman                                 /* 10*/ 0,
110709dc0095SBarry Smith                                         0,
110809dc0095SBarry Smith                                         0,
110909dc0095SBarry Smith                                         0,
111009dc0095SBarry Smith                                         MatTranspose_MPIDense,
111197304618SKris Buschelman                                 /* 15*/ MatGetInfo_MPIDense,
11126e4ee0c6SHong Zhang                                         MatEqual_MPIDense,
111309dc0095SBarry Smith                                         MatGetDiagonal_MPIDense,
11145b2fa520SLois Curfman McInnes                                         MatDiagonalScale_MPIDense,
111509dc0095SBarry Smith                                         MatNorm_MPIDense,
111697304618SKris Buschelman                                 /* 20*/ MatAssemblyBegin_MPIDense,
111709dc0095SBarry Smith                                         MatAssemblyEnd_MPIDense,
111809dc0095SBarry Smith                                         MatSetOption_MPIDense,
111909dc0095SBarry Smith                                         MatZeroEntries_MPIDense,
1120d519adbfSMatthew Knepley                                 /* 24*/ MatZeroRows_MPIDense,
1121919b68f7SBarry Smith                                         0,
112201b82886SBarry Smith                                         0,
112301b82886SBarry Smith                                         0,
112401b82886SBarry Smith                                         0,
11254994cf47SJed Brown                                 /* 29*/ MatSetUp_MPIDense,
1126273d9f13SBarry Smith                                         0,
112709dc0095SBarry Smith                                         0,
1128c56a70eeSBarry Smith                                         MatGetDiagonalBlock_MPIDense,
11298c778c55SBarry Smith                                         0,
1130d519adbfSMatthew Knepley                                 /* 34*/ MatDuplicate_MPIDense,
113109dc0095SBarry Smith                                         0,
113209dc0095SBarry Smith                                         0,
113309dc0095SBarry Smith                                         0,
113409dc0095SBarry Smith                                         0,
1135d519adbfSMatthew Knepley                                 /* 39*/ MatAXPY_MPIDense,
11367dae84e0SHong Zhang                                         MatCreateSubMatrices_MPIDense,
113709dc0095SBarry Smith                                         0,
113809dc0095SBarry Smith                                         MatGetValues_MPIDense,
113909dc0095SBarry Smith                                         0,
1140d519adbfSMatthew Knepley                                 /* 44*/ 0,
114109dc0095SBarry Smith                                         MatScale_MPIDense,
11427d68702bSBarry Smith                                         MatShift_Basic,
114309dc0095SBarry Smith                                         0,
114409dc0095SBarry Smith                                         0,
114573a71a0fSBarry Smith                                 /* 49*/ MatSetRandom_MPIDense,
114609dc0095SBarry Smith                                         0,
114709dc0095SBarry Smith                                         0,
114809dc0095SBarry Smith                                         0,
114909dc0095SBarry Smith                                         0,
1150d519adbfSMatthew Knepley                                 /* 54*/ 0,
115109dc0095SBarry Smith                                         0,
115209dc0095SBarry Smith                                         0,
115309dc0095SBarry Smith                                         0,
115409dc0095SBarry Smith                                         0,
11557dae84e0SHong Zhang                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1156b9b97703SBarry Smith                                         MatDestroy_MPIDense,
1157b9b97703SBarry Smith                                         MatView_MPIDense,
1158357abbc8SBarry Smith                                         0,
115997304618SKris Buschelman                                         0,
1160d519adbfSMatthew Knepley                                 /* 64*/ 0,
116197304618SKris Buschelman                                         0,
116297304618SKris Buschelman                                         0,
116397304618SKris Buschelman                                         0,
116497304618SKris Buschelman                                         0,
1165d519adbfSMatthew Knepley                                 /* 69*/ 0,
116697304618SKris Buschelman                                         0,
116797304618SKris Buschelman                                         0,
116897304618SKris Buschelman                                         0,
116997304618SKris Buschelman                                         0,
1170d519adbfSMatthew Knepley                                 /* 74*/ 0,
117197304618SKris Buschelman                                         0,
117297304618SKris Buschelman                                         0,
117397304618SKris Buschelman                                         0,
117497304618SKris Buschelman                                         0,
1175d519adbfSMatthew Knepley                                 /* 79*/ 0,
117697304618SKris Buschelman                                         0,
117797304618SKris Buschelman                                         0,
117897304618SKris Buschelman                                         0,
11795bba2384SShri Abhyankar                                 /* 83*/ MatLoad_MPIDense,
1180865e5f61SKris Buschelman                                         0,
1181865e5f61SKris Buschelman                                         0,
1182865e5f61SKris Buschelman                                         0,
1183865e5f61SKris Buschelman                                         0,
1184865e5f61SKris Buschelman                                         0,
11859775878aSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1186320f2790SHong Zhang                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1187320f2790SHong Zhang                                         MatMatMultSymbolic_MPIDense_MPIDense,
11889775878aSHong Zhang #else
11899775878aSHong Zhang                                 /* 89*/ 0,
1190865e5f61SKris Buschelman                                         0,
11919775878aSHong Zhang #endif
1192fd4e9aacSBarry Smith                                         MatMatMultNumeric_MPIDense,
11932fbe02b9SBarry Smith                                         0,
1194ba337c44SJed Brown                                         0,
1195d519adbfSMatthew Knepley                                 /* 94*/ 0,
1196*cc48ffa7SToby Isaac                                         MatMatTransposeMult_MPIDense_MPIDense,
1197*cc48ffa7SToby Isaac                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1198*cc48ffa7SToby Isaac                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1199ba337c44SJed Brown                                         0,
1200ba337c44SJed Brown                                 /* 99*/ 0,
1201ba337c44SJed Brown                                         0,
1202ba337c44SJed Brown                                         0,
1203ba337c44SJed Brown                                         MatConjugate_MPIDense,
1204ba337c44SJed Brown                                         0,
1205ba337c44SJed Brown                                 /*104*/ 0,
1206ba337c44SJed Brown                                         MatRealPart_MPIDense,
1207ba337c44SJed Brown                                         MatImaginaryPart_MPIDense,
120886d161a7SShri Abhyankar                                         0,
120986d161a7SShri Abhyankar                                         0,
121086d161a7SShri Abhyankar                                 /*109*/ 0,
121186d161a7SShri Abhyankar                                         0,
121286d161a7SShri Abhyankar                                         0,
121386d161a7SShri Abhyankar                                         0,
12143b49f96aSBarry Smith                                         MatMissingDiagonal_MPIDense,
121586d161a7SShri Abhyankar                                 /*114*/ 0,
121686d161a7SShri Abhyankar                                         0,
121786d161a7SShri Abhyankar                                         0,
121886d161a7SShri Abhyankar                                         0,
121986d161a7SShri Abhyankar                                         0,
122086d161a7SShri Abhyankar                                 /*119*/ 0,
122186d161a7SShri Abhyankar                                         0,
122286d161a7SShri Abhyankar                                         0,
12230716a85fSBarry Smith                                         0,
12240716a85fSBarry Smith                                         0,
12250716a85fSBarry Smith                                 /*124*/ 0,
12263964eb88SJed Brown                                         MatGetColumnNorms_MPIDense,
12273964eb88SJed Brown                                         0,
12283964eb88SJed Brown                                         0,
12293964eb88SJed Brown                                         0,
12303964eb88SJed Brown                                 /*129*/ 0,
1231cb20be35SHong Zhang                                         MatTransposeMatMult_MPIDense_MPIDense,
1232cb20be35SHong Zhang                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1233cb20be35SHong Zhang                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
12343964eb88SJed Brown                                         0,
12353964eb88SJed Brown                                 /*134*/ 0,
12363964eb88SJed Brown                                         0,
12373964eb88SJed Brown                                         0,
12383964eb88SJed Brown                                         0,
12393964eb88SJed Brown                                         0,
12403964eb88SJed Brown                                 /*139*/ 0,
1241f9426fe0SMark Adams                                         0,
124294e2cb23SJakub Kruzik                                         0,
124394e2cb23SJakub Kruzik                                         0,
124494e2cb23SJakub Kruzik                                         0,
12451ae5eee8SJakub Kruzik                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1246ba337c44SJed Brown };
12478965ea79SLois Curfman McInnes 
12487087cfbeSBarry Smith PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1249a23d5eceSKris Buschelman {
1250a23d5eceSKris Buschelman   Mat_MPIDense   *a;
1251dfbe8321SBarry Smith   PetscErrorCode ierr;
1252a23d5eceSKris Buschelman 
1253a23d5eceSKris Buschelman   PetscFunctionBegin;
1254430ada49SBarry Smith   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1255a23d5eceSKris Buschelman   mat->preallocated = PETSC_TRUE;
1256a23d5eceSKris Buschelman   /* Note:  For now, when data is specified above, this assumes the user correctly
1257a23d5eceSKris Buschelman    allocates the local dense storage space.  We should add error checking. */
1258a23d5eceSKris Buschelman 
1259a23d5eceSKris Buschelman   a       = (Mat_MPIDense*)mat->data;
126034ef9618SShri Abhyankar   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
126134ef9618SShri Abhyankar   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
126234ef9618SShri Abhyankar   a->nvec = mat->cmap->n;
126334ef9618SShri Abhyankar 
1264f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1265d0f46423SBarry Smith   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
12665c5985e7SKris Buschelman   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
12675c5985e7SKris Buschelman   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
12683bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1269a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1270a23d5eceSKris Buschelman }
1271a23d5eceSKris Buschelman 
127265b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1273cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
12748baccfbdSHong Zhang {
12758ea901baSHong Zhang   Mat            mat_elemental;
12768ea901baSHong Zhang   PetscErrorCode ierr;
127732d7a744SHong Zhang   PetscScalar    *v;
127832d7a744SHong Zhang   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
12798ea901baSHong Zhang 
12808baccfbdSHong Zhang   PetscFunctionBegin;
1281378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1282378336b6SHong Zhang     mat_elemental = *newmat;
1283378336b6SHong Zhang     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1284378336b6SHong Zhang   } else {
1285378336b6SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1286378336b6SHong Zhang     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1287378336b6SHong Zhang     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1288378336b6SHong Zhang     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
128932d7a744SHong Zhang     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1290378336b6SHong Zhang   }
1291378336b6SHong Zhang 
129232d7a744SHong Zhang   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
129332d7a744SHong Zhang   for (i=0; i<N; i++) cols[i] = i;
129432d7a744SHong Zhang   for (i=0; i<m; i++) rows[i] = rstart + i;
12958ea901baSHong Zhang 
12968ea901baSHong Zhang   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
129732d7a744SHong Zhang   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
129832d7a744SHong Zhang   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
12998ea901baSHong Zhang   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
13008ea901baSHong Zhang   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
130132d7a744SHong Zhang   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
130232d7a744SHong Zhang   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
13038ea901baSHong Zhang 
1304511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
130528be2f97SBarry Smith     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
13068ea901baSHong Zhang   } else {
13078ea901baSHong Zhang     *newmat = mat_elemental;
13088ea901baSHong Zhang   }
13098baccfbdSHong Zhang   PetscFunctionReturn(0);
13108baccfbdSHong Zhang }
131165b80a83SHong Zhang #endif
13128baccfbdSHong Zhang 
1313af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
131486aefd0dSHong Zhang {
131586aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
131686aefd0dSHong Zhang   PetscErrorCode ierr;
131786aefd0dSHong Zhang 
131886aefd0dSHong Zhang   PetscFunctionBegin;
131986aefd0dSHong Zhang   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
132086aefd0dSHong Zhang   PetscFunctionReturn(0);
132186aefd0dSHong Zhang }
132286aefd0dSHong Zhang 
1323af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
132486aefd0dSHong Zhang {
132586aefd0dSHong Zhang   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
132686aefd0dSHong Zhang   PetscErrorCode ierr;
132786aefd0dSHong Zhang 
132886aefd0dSHong Zhang   PetscFunctionBegin;
132986aefd0dSHong Zhang   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
133086aefd0dSHong Zhang   PetscFunctionReturn(0);
133186aefd0dSHong Zhang }
133286aefd0dSHong Zhang 
133394e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
133494e2cb23SJakub Kruzik {
133594e2cb23SJakub Kruzik   PetscErrorCode ierr;
133694e2cb23SJakub Kruzik   Mat_MPIDense   *mat;
133794e2cb23SJakub Kruzik   PetscInt       m,nloc,N;
133894e2cb23SJakub Kruzik 
133994e2cb23SJakub Kruzik   PetscFunctionBegin;
134094e2cb23SJakub Kruzik   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
134194e2cb23SJakub Kruzik   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
134294e2cb23SJakub Kruzik   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
134394e2cb23SJakub Kruzik     PetscInt sum;
134494e2cb23SJakub Kruzik 
134594e2cb23SJakub Kruzik     if (n == PETSC_DECIDE) {
134694e2cb23SJakub Kruzik       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
134794e2cb23SJakub Kruzik     }
134894e2cb23SJakub Kruzik     /* Check sum(n) = N */
134994e2cb23SJakub Kruzik     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
135094e2cb23SJakub Kruzik     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
135194e2cb23SJakub Kruzik 
135294e2cb23SJakub Kruzik     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
135394e2cb23SJakub Kruzik   }
135494e2cb23SJakub Kruzik 
135594e2cb23SJakub Kruzik   /* numeric phase */
135694e2cb23SJakub Kruzik   mat = (Mat_MPIDense*)(*outmat)->data;
135794e2cb23SJakub Kruzik   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
135894e2cb23SJakub Kruzik   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
135994e2cb23SJakub Kruzik   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
136094e2cb23SJakub Kruzik   PetscFunctionReturn(0);
136194e2cb23SJakub Kruzik }
136294e2cb23SJakub Kruzik 
13638cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1364273d9f13SBarry Smith {
1365273d9f13SBarry Smith   Mat_MPIDense   *a;
1366dfbe8321SBarry Smith   PetscErrorCode ierr;
1367273d9f13SBarry Smith 
1368273d9f13SBarry Smith   PetscFunctionBegin;
1369b00a9115SJed Brown   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1370b0a32e0cSBarry Smith   mat->data = (void*)a;
1371273d9f13SBarry Smith   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1372273d9f13SBarry Smith 
1373273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1374ce94432eSBarry Smith   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1375ce94432eSBarry Smith   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1376273d9f13SBarry Smith 
1377273d9f13SBarry Smith   /* build cache for off array entries formed */
1378273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
13792205254eSKarl Rupp 
1380ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1381273d9f13SBarry Smith 
1382273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1383273d9f13SBarry Smith   a->lvec        = 0;
1384273d9f13SBarry Smith   a->Mvctx       = 0;
1385273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1386273d9f13SBarry Smith 
1387bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
13888572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
13898572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
13908572280aSBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1391d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1392d3042a70SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
13938baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
13948baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
13958baccfbdSHong Zhang #endif
1396bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1397bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
14008949adfdSHong Zhang 
14018949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
14028949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
14038949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1404af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1405af53bab2SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
140638aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1407273d9f13SBarry Smith   PetscFunctionReturn(0);
1408273d9f13SBarry Smith }
1409273d9f13SBarry Smith 
1410209238afSKris Buschelman /*MC
1411002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1412209238afSKris Buschelman 
1413209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1414209238afSKris Buschelman    and MATMPIDENSE otherwise.
1415209238afSKris Buschelman 
1416209238afSKris Buschelman    Options Database Keys:
1417209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1418209238afSKris Buschelman 
1419209238afSKris Buschelman   Level: beginner
1420209238afSKris Buschelman 
142101b82886SBarry Smith 
1422209238afSKris Buschelman .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1423209238afSKris Buschelman M*/
1424209238afSKris Buschelman 
1425273d9f13SBarry Smith /*@C
1426273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1427273d9f13SBarry Smith 
1428273d9f13SBarry Smith    Not collective
1429273d9f13SBarry Smith 
1430273d9f13SBarry Smith    Input Parameters:
14311c4f3114SJed Brown .  B - the matrix
14320298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1433273d9f13SBarry Smith    to control all matrix memory allocation.
1434273d9f13SBarry Smith 
1435273d9f13SBarry Smith    Notes:
1436273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1437273d9f13SBarry Smith    storage by columns.
1438273d9f13SBarry Smith 
1439273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1440273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
14410298fd71SBarry Smith    set data=NULL.
1442273d9f13SBarry Smith 
1443273d9f13SBarry Smith    Level: intermediate
1444273d9f13SBarry Smith 
1445273d9f13SBarry Smith .keywords: matrix,dense, parallel
1446273d9f13SBarry Smith 
1447273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1448273d9f13SBarry Smith @*/
14491c4f3114SJed Brown PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1450273d9f13SBarry Smith {
14514ac538c5SBarry Smith   PetscErrorCode ierr;
1452273d9f13SBarry Smith 
1453273d9f13SBarry Smith   PetscFunctionBegin;
14541c4f3114SJed Brown   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1455273d9f13SBarry Smith   PetscFunctionReturn(0);
1456273d9f13SBarry Smith }
1457273d9f13SBarry Smith 
1458d3042a70SBarry Smith /*@
1459d3042a70SBarry Smith    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1460d3042a70SBarry Smith    array provided by the user. This is useful to avoid copying an array
1461d3042a70SBarry Smith    into a matrix
1462d3042a70SBarry Smith 
1463d3042a70SBarry Smith    Not Collective
1464d3042a70SBarry Smith 
1465d3042a70SBarry Smith    Input Parameters:
1466d3042a70SBarry Smith +  mat - the matrix
1467d3042a70SBarry Smith -  array - the array in column major order
1468d3042a70SBarry Smith 
1469d3042a70SBarry Smith    Notes:
1470d3042a70SBarry Smith    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1471d3042a70SBarry Smith    freed when the matrix is destroyed.
1472d3042a70SBarry Smith 
1473d3042a70SBarry Smith    Level: developer
1474d3042a70SBarry Smith 
1475d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1476d3042a70SBarry Smith 
1477d3042a70SBarry Smith @*/
1478d3042a70SBarry Smith PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1479d3042a70SBarry Smith {
1480d3042a70SBarry Smith   PetscErrorCode ierr;
1481d3042a70SBarry Smith   PetscFunctionBegin;
1482d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1483d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1484d3042a70SBarry Smith   PetscFunctionReturn(0);
1485d3042a70SBarry Smith }
1486d3042a70SBarry Smith 
1487d3042a70SBarry Smith /*@
1488d3042a70SBarry Smith    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1489d3042a70SBarry Smith 
1490d3042a70SBarry Smith    Not Collective
1491d3042a70SBarry Smith 
1492d3042a70SBarry Smith    Input Parameters:
1493d3042a70SBarry Smith .  mat - the matrix
1494d3042a70SBarry Smith 
1495d3042a70SBarry Smith    Notes:
1496d3042a70SBarry Smith    You can only call this after a call to MatDensePlaceArray()
1497d3042a70SBarry Smith 
1498d3042a70SBarry Smith    Level: developer
1499d3042a70SBarry Smith 
1500d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1501d3042a70SBarry Smith 
1502d3042a70SBarry Smith @*/
1503d3042a70SBarry Smith PetscErrorCode  MatDenseResetArray(Mat mat)
1504d3042a70SBarry Smith {
1505d3042a70SBarry Smith   PetscErrorCode ierr;
1506d3042a70SBarry Smith   PetscFunctionBegin;
1507d3042a70SBarry Smith   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1508d3042a70SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1509d3042a70SBarry Smith   PetscFunctionReturn(0);
1510d3042a70SBarry Smith }
1511d3042a70SBarry Smith 
15128965ea79SLois Curfman McInnes /*@C
151369b1f4b7SBarry Smith    MatCreateDense - Creates a parallel matrix in dense format.
15148965ea79SLois Curfman McInnes 
1515db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1516db81eaa0SLois Curfman McInnes 
15178965ea79SLois Curfman McInnes    Input Parameters:
1518db81eaa0SLois Curfman McInnes +  comm - MPI communicator
15198965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1520db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
15218965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1522db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
15236cfe35ddSJose E. Roman -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1524dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
15258965ea79SLois Curfman McInnes 
15268965ea79SLois Curfman McInnes    Output Parameter:
1527477f1c0bSLois Curfman McInnes .  A - the matrix
15288965ea79SLois Curfman McInnes 
1529b259b22eSLois Curfman McInnes    Notes:
153039ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
153139ddd567SLois Curfman McInnes    storage by columns.
15328965ea79SLois Curfman McInnes 
153318f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
153418f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
15356cfe35ddSJose E. Roman    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
153618f449edSLois Curfman McInnes 
15378965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
15388965ea79SLois Curfman McInnes    (possibly both).
15398965ea79SLois Curfman McInnes 
1540027ccd11SLois Curfman McInnes    Level: intermediate
1541027ccd11SLois Curfman McInnes 
154239ddd567SLois Curfman McInnes .keywords: matrix,dense, parallel
15438965ea79SLois Curfman McInnes 
154439ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
15458965ea79SLois Curfman McInnes @*/
154669b1f4b7SBarry Smith PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
15478965ea79SLois Curfman McInnes {
15486849ba73SBarry Smith   PetscErrorCode ierr;
154913f74950SBarry Smith   PetscMPIInt    size;
15508965ea79SLois Curfman McInnes 
15513a40ed3dSBarry Smith   PetscFunctionBegin;
1552f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1553f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1554273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1555273d9f13SBarry Smith   if (size > 1) {
1556273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1557273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
15586cfe35ddSJose E. Roman     if (data) {  /* user provided data array, so no need to assemble */
15596cfe35ddSJose E. Roman       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
15606cfe35ddSJose E. Roman       (*A)->assembled = PETSC_TRUE;
15616cfe35ddSJose E. Roman     }
1562273d9f13SBarry Smith   } else {
1563273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1564273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
15658c469469SLois Curfman McInnes   }
15663a40ed3dSBarry Smith   PetscFunctionReturn(0);
15678965ea79SLois Curfman McInnes }
15688965ea79SLois Curfman McInnes 
15696849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
15708965ea79SLois Curfman McInnes {
15718965ea79SLois Curfman McInnes   Mat            mat;
15723501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1573dfbe8321SBarry Smith   PetscErrorCode ierr;
15748965ea79SLois Curfman McInnes 
15753a40ed3dSBarry Smith   PetscFunctionBegin;
15768965ea79SLois Curfman McInnes   *newmat = 0;
1577ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1578d0f46423SBarry Smith   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
15797adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1580834f8fabSBarry Smith   a       = (Mat_MPIDense*)mat->data;
15815aa7edbeSHong Zhang 
1582d5f3da31SBarry Smith   mat->factortype   = A->factortype;
1583c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1584273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
15858965ea79SLois Curfman McInnes 
15868965ea79SLois Curfman McInnes   a->size         = oldmat->size;
15878965ea79SLois Curfman McInnes   a->rank         = oldmat->rank;
1588e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
1589b9b97703SBarry Smith   a->nvec         = oldmat->nvec;
15903782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
1591e04c1aa4SHong Zhang 
15921e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
15931e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
15948965ea79SLois Curfman McInnes 
1595329f5518SBarry Smith   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
15965609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
15973bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
159801b82886SBarry Smith 
15998965ea79SLois Curfman McInnes   *newmat = mat;
16003a40ed3dSBarry Smith   PetscFunctionReturn(0);
16018965ea79SLois Curfman McInnes }
16028965ea79SLois Curfman McInnes 
16039d36ed5fSBarry Smith PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
160486d161a7SShri Abhyankar {
160586d161a7SShri Abhyankar   PetscErrorCode ierr;
160686d161a7SShri Abhyankar   PetscMPIInt    rank,size;
160774c13d95SBarry Smith   const PetscInt *rowners;
160874c13d95SBarry Smith   PetscInt       i,m,n,nz,j,mMax;
160986d161a7SShri Abhyankar   PetscScalar    *array,*vals,*vals_ptr;
16109d36ed5fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
161186d161a7SShri Abhyankar 
161286d161a7SShri Abhyankar   PetscFunctionBegin;
161386d161a7SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
161486d161a7SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
161586d161a7SShri Abhyankar 
161674c13d95SBarry Smith   /* determine ownership of rows and columns */
161774c13d95SBarry Smith   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
161874c13d95SBarry Smith   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
161986d161a7SShri Abhyankar 
162074c13d95SBarry Smith   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1621430ada49SBarry Smith   if (!a->A) {
16220298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
16239d36ed5fSBarry Smith   }
16248c778c55SBarry Smith   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
162549467e7dSBarry Smith   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
162674c13d95SBarry Smith   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1627396e826eSBarry Smith   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
162886d161a7SShri Abhyankar   if (!rank) {
16299d36ed5fSBarry Smith     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
163086d161a7SShri Abhyankar 
163186d161a7SShri Abhyankar     /* read in my part of the matrix numerical values  */
163286d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
163386d161a7SShri Abhyankar 
163486d161a7SShri Abhyankar     /* insert into matrix-by row (this is why cannot directly read into array */
163586d161a7SShri Abhyankar     vals_ptr = vals;
163686d161a7SShri Abhyankar     for (i=0; i<m; i++) {
163786d161a7SShri Abhyankar       for (j=0; j<N; j++) {
163886d161a7SShri Abhyankar         array[i + j*m] = *vals_ptr++;
163986d161a7SShri Abhyankar       }
164086d161a7SShri Abhyankar     }
164186d161a7SShri Abhyankar 
164286d161a7SShri Abhyankar     /* read in other processors and ship out */
164386d161a7SShri Abhyankar     for (i=1; i<size; i++) {
164486d161a7SShri Abhyankar       nz   = (rowners[i+1] - rowners[i])*N;
164586d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1646a25532f0SBarry Smith       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
164786d161a7SShri Abhyankar     }
164886d161a7SShri Abhyankar   } else {
164986d161a7SShri Abhyankar     /* receive numeric values */
1650785e854fSJed Brown     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
165186d161a7SShri Abhyankar 
165286d161a7SShri Abhyankar     /* receive message of values*/
1653a25532f0SBarry Smith     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
165486d161a7SShri Abhyankar 
165586d161a7SShri Abhyankar     /* insert into matrix-by row (this is why cannot directly read into array */
165686d161a7SShri Abhyankar     vals_ptr = vals;
165786d161a7SShri Abhyankar     for (i=0; i<m; i++) {
165886d161a7SShri Abhyankar       for (j=0; j<N; j++) {
165986d161a7SShri Abhyankar         array[i + j*m] = *vals_ptr++;
166086d161a7SShri Abhyankar       }
166186d161a7SShri Abhyankar     }
166286d161a7SShri Abhyankar   }
16638c778c55SBarry Smith   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
166486d161a7SShri Abhyankar   ierr = PetscFree(vals);CHKERRQ(ierr);
166586d161a7SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166686d161a7SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166786d161a7SShri Abhyankar   PetscFunctionReturn(0);
166886d161a7SShri Abhyankar }
166986d161a7SShri Abhyankar 
1670112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
167186d161a7SShri Abhyankar {
1672dfdea288SBarry Smith   Mat_MPIDense   *a;
167386d161a7SShri Abhyankar   PetscScalar    *vals,*svals;
1674ce94432eSBarry Smith   MPI_Comm       comm;
167586d161a7SShri Abhyankar   MPI_Status     status;
167649467e7dSBarry Smith   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
167786d161a7SShri Abhyankar   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
167849467e7dSBarry Smith   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
16799d36ed5fSBarry Smith   PetscInt       i,nz,j,rstart,rend;
168086d161a7SShri Abhyankar   int            fd;
16817f489da9SVaclav Hapla   PetscBool      isbinary;
168286d161a7SShri Abhyankar   PetscErrorCode ierr;
168386d161a7SShri Abhyankar 
168486d161a7SShri Abhyankar   PetscFunctionBegin;
16857f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
16867f489da9SVaclav Hapla   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
16877f489da9SVaclav Hapla 
1688c98fd787SBarry Smith   /* force binary viewer to load .info file if it has not yet done so */
1689c98fd787SBarry Smith   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1690ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
169186d161a7SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
169286d161a7SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
169386d161a7SShri Abhyankar   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
16945872f025SBarry Smith   if (!rank) {
169586d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
169686d161a7SShri Abhyankar     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
169786d161a7SShri Abhyankar   }
169886d161a7SShri Abhyankar   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
169986d161a7SShri Abhyankar   M    = header[1]; N = header[2]; nz = header[3];
170086d161a7SShri Abhyankar 
170186d161a7SShri Abhyankar   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
17029d36ed5fSBarry Smith   if (newmat->rmap->N < 0) newmat->rmap->N = M;
17039d36ed5fSBarry Smith   if (newmat->cmap->N < 0) newmat->cmap->N = N;
170486d161a7SShri Abhyankar 
17059d36ed5fSBarry Smith   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
17069d36ed5fSBarry Smith   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
170786d161a7SShri Abhyankar 
170886d161a7SShri Abhyankar   /*
170986d161a7SShri Abhyankar        Handle case where matrix is stored on disk as a dense matrix
171086d161a7SShri Abhyankar   */
171186d161a7SShri Abhyankar   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
17129d36ed5fSBarry Smith     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
171386d161a7SShri Abhyankar     PetscFunctionReturn(0);
171486d161a7SShri Abhyankar   }
171586d161a7SShri Abhyankar 
171686d161a7SShri Abhyankar   /* determine ownership of all rows */
17172205254eSKarl Rupp   if (newmat->rmap->n < 0) {
17182205254eSKarl Rupp     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
17192205254eSKarl Rupp   } else {
17202205254eSKarl Rupp     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
17212205254eSKarl Rupp   }
172249467e7dSBarry Smith   if (newmat->cmap->n < 0) {
172349467e7dSBarry Smith     n = PETSC_DECIDE;
172449467e7dSBarry Smith   } else {
172549467e7dSBarry Smith     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
172649467e7dSBarry Smith   }
172749467e7dSBarry Smith 
1728854ce69bSBarry Smith   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
172986d161a7SShri Abhyankar   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
173086d161a7SShri Abhyankar   rowners[0] = 0;
173186d161a7SShri Abhyankar   for (i=2; i<=size; i++) {
173286d161a7SShri Abhyankar     rowners[i] += rowners[i-1];
173386d161a7SShri Abhyankar   }
173486d161a7SShri Abhyankar   rstart = rowners[rank];
173586d161a7SShri Abhyankar   rend   = rowners[rank+1];
173686d161a7SShri Abhyankar 
173786d161a7SShri Abhyankar   /* distribute row lengths to all processors */
173849467e7dSBarry Smith   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
173986d161a7SShri Abhyankar   if (!rank) {
1740785e854fSJed Brown     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
174186d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1742785e854fSJed Brown     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
174386d161a7SShri Abhyankar     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
174486d161a7SShri Abhyankar     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
174586d161a7SShri Abhyankar     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
174686d161a7SShri Abhyankar   } else {
174786d161a7SShri Abhyankar     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
174886d161a7SShri Abhyankar   }
174986d161a7SShri Abhyankar 
175086d161a7SShri Abhyankar   if (!rank) {
175186d161a7SShri Abhyankar     /* calculate the number of nonzeros on each processor */
1752785e854fSJed Brown     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
175386d161a7SShri Abhyankar     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
175486d161a7SShri Abhyankar     for (i=0; i<size; i++) {
175586d161a7SShri Abhyankar       for (j=rowners[i]; j< rowners[i+1]; j++) {
175686d161a7SShri Abhyankar         procsnz[i] += rowlengths[j];
175786d161a7SShri Abhyankar       }
175886d161a7SShri Abhyankar     }
175986d161a7SShri Abhyankar     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
176086d161a7SShri Abhyankar 
176186d161a7SShri Abhyankar     /* determine max buffer needed and allocate it */
176286d161a7SShri Abhyankar     maxnz = 0;
176386d161a7SShri Abhyankar     for (i=0; i<size; i++) {
176486d161a7SShri Abhyankar       maxnz = PetscMax(maxnz,procsnz[i]);
176586d161a7SShri Abhyankar     }
1766785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
176786d161a7SShri Abhyankar 
176886d161a7SShri Abhyankar     /* read in my part of the matrix column indices  */
176986d161a7SShri Abhyankar     nz   = procsnz[0];
1770785e854fSJed Brown     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
177186d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
177286d161a7SShri Abhyankar 
177386d161a7SShri Abhyankar     /* read in every one elses and ship off */
177486d161a7SShri Abhyankar     for (i=1; i<size; i++) {
177586d161a7SShri Abhyankar       nz   = procsnz[i];
177686d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
177786d161a7SShri Abhyankar       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
177886d161a7SShri Abhyankar     }
177986d161a7SShri Abhyankar     ierr = PetscFree(cols);CHKERRQ(ierr);
178086d161a7SShri Abhyankar   } else {
178186d161a7SShri Abhyankar     /* determine buffer space needed for message */
178286d161a7SShri Abhyankar     nz = 0;
178386d161a7SShri Abhyankar     for (i=0; i<m; i++) {
178486d161a7SShri Abhyankar       nz += ourlens[i];
178586d161a7SShri Abhyankar     }
1786854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
178786d161a7SShri Abhyankar 
178886d161a7SShri Abhyankar     /* receive message of column indices*/
178986d161a7SShri Abhyankar     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
179086d161a7SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
179186d161a7SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
179286d161a7SShri Abhyankar   }
179386d161a7SShri Abhyankar 
179449467e7dSBarry Smith   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1795dfdea288SBarry Smith   a = (Mat_MPIDense*)newmat->data;
1796430ada49SBarry Smith   if (!a->A) {
17970298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1798dfdea288SBarry Smith   }
179986d161a7SShri Abhyankar 
180086d161a7SShri Abhyankar   if (!rank) {
1801785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
180286d161a7SShri Abhyankar 
180386d161a7SShri Abhyankar     /* read in my part of the matrix numerical values  */
180486d161a7SShri Abhyankar     nz   = procsnz[0];
180586d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
180686d161a7SShri Abhyankar 
180786d161a7SShri Abhyankar     /* insert into matrix */
180886d161a7SShri Abhyankar     jj      = rstart;
180986d161a7SShri Abhyankar     smycols = mycols;
181086d161a7SShri Abhyankar     svals   = vals;
181186d161a7SShri Abhyankar     for (i=0; i<m; i++) {
181286d161a7SShri Abhyankar       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
181386d161a7SShri Abhyankar       smycols += ourlens[i];
181486d161a7SShri Abhyankar       svals   += ourlens[i];
181586d161a7SShri Abhyankar       jj++;
181686d161a7SShri Abhyankar     }
181786d161a7SShri Abhyankar 
181886d161a7SShri Abhyankar     /* read in other processors and ship out */
181986d161a7SShri Abhyankar     for (i=1; i<size; i++) {
182086d161a7SShri Abhyankar       nz   = procsnz[i];
182186d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
182286d161a7SShri Abhyankar       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
182386d161a7SShri Abhyankar     }
182486d161a7SShri Abhyankar     ierr = PetscFree(procsnz);CHKERRQ(ierr);
182586d161a7SShri Abhyankar   } else {
182686d161a7SShri Abhyankar     /* receive numeric values */
1827854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
182886d161a7SShri Abhyankar 
182986d161a7SShri Abhyankar     /* receive message of values*/
183086d161a7SShri Abhyankar     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
183186d161a7SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
183286d161a7SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
183386d161a7SShri Abhyankar 
183486d161a7SShri Abhyankar     /* insert into matrix */
183586d161a7SShri Abhyankar     jj      = rstart;
183686d161a7SShri Abhyankar     smycols = mycols;
183786d161a7SShri Abhyankar     svals   = vals;
183886d161a7SShri Abhyankar     for (i=0; i<m; i++) {
183986d161a7SShri Abhyankar       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
184086d161a7SShri Abhyankar       smycols += ourlens[i];
184186d161a7SShri Abhyankar       svals   += ourlens[i];
184286d161a7SShri Abhyankar       jj++;
184386d161a7SShri Abhyankar     }
184486d161a7SShri Abhyankar   }
184549467e7dSBarry Smith   ierr = PetscFree(ourlens);CHKERRQ(ierr);
184686d161a7SShri Abhyankar   ierr = PetscFree(vals);CHKERRQ(ierr);
184786d161a7SShri Abhyankar   ierr = PetscFree(mycols);CHKERRQ(ierr);
184886d161a7SShri Abhyankar   ierr = PetscFree(rowners);CHKERRQ(ierr);
184986d161a7SShri Abhyankar 
185086d161a7SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
185186d161a7SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
185286d161a7SShri Abhyankar   PetscFunctionReturn(0);
185386d161a7SShri Abhyankar }
185486d161a7SShri Abhyankar 
1855ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
18566e4ee0c6SHong Zhang {
18576e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
18586e4ee0c6SHong Zhang   Mat            a,b;
1859ace3abfcSBarry Smith   PetscBool      flg;
18606e4ee0c6SHong Zhang   PetscErrorCode ierr;
186190ace30eSBarry Smith 
18626e4ee0c6SHong Zhang   PetscFunctionBegin;
18636e4ee0c6SHong Zhang   a    = matA->A;
18646e4ee0c6SHong Zhang   b    = matB->A;
18656e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1866b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
18676e4ee0c6SHong Zhang   PetscFunctionReturn(0);
18686e4ee0c6SHong Zhang }
186990ace30eSBarry Smith 
1870baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1871baa3c1c6SHong Zhang {
1872baa3c1c6SHong Zhang   PetscErrorCode        ierr;
1873baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1874baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = a->atbdense;
1875baa3c1c6SHong Zhang 
1876baa3c1c6SHong Zhang   PetscFunctionBegin;
1877c5ef1628SHong Zhang   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1878baa3c1c6SHong Zhang   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1879baa3c1c6SHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
1880baa3c1c6SHong Zhang   PetscFunctionReturn(0);
1881baa3c1c6SHong Zhang }
1882baa3c1c6SHong Zhang 
1883*cc48ffa7SToby Isaac PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1884*cc48ffa7SToby Isaac {
1885*cc48ffa7SToby Isaac   PetscErrorCode        ierr;
1886*cc48ffa7SToby Isaac   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1887*cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt = a->abtdense;
1888*cc48ffa7SToby Isaac 
1889*cc48ffa7SToby Isaac   PetscFunctionBegin;
1890*cc48ffa7SToby Isaac   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1891*cc48ffa7SToby Isaac   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1892*cc48ffa7SToby Isaac   ierr = PetscFree(abt);CHKERRQ(ierr);
1893*cc48ffa7SToby Isaac   PetscFunctionReturn(0);
1894*cc48ffa7SToby Isaac }
1895*cc48ffa7SToby Isaac 
1896cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1897cb20be35SHong Zhang {
1898baa3c1c6SHong Zhang   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1899660d5466SHong Zhang   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1900baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = c->atbdense;
1901cb20be35SHong Zhang   PetscErrorCode ierr;
1902cb20be35SHong Zhang   MPI_Comm       comm;
1903d5017740SHong Zhang   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1904c5ef1628SHong Zhang   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1905d5017740SHong Zhang   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1906660d5466SHong Zhang   PetscScalar    _DOne=1.0,_DZero=0.0;
1907adc7a786SHong Zhang   PetscBLASInt   am,an,bn,aN;
1908e68c0b26SHong Zhang   const PetscInt *ranges;
1909cb20be35SHong Zhang 
1910cb20be35SHong Zhang   PetscFunctionBegin;
1911cb20be35SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1912cb20be35SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1913cb20be35SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1914e68c0b26SHong Zhang 
1915c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
1916660d5466SHong Zhang   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1917660d5466SHong Zhang   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1918660d5466SHong Zhang   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1919adc7a786SHong Zhang   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1920adc7a786SHong Zhang   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1921cb20be35SHong Zhang 
1922cb20be35SHong Zhang   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1923c5ef1628SHong Zhang   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1924cb20be35SHong Zhang 
1925660d5466SHong Zhang   /* arrange atbarray into sendbuf */
1926cb20be35SHong Zhang   k = 0;
1927cb20be35SHong Zhang   for (proc=0; proc<size; proc++) {
1928baa3c1c6SHong Zhang     for (j=0; j<cN; j++) {
1929c5ef1628SHong Zhang       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1930cb20be35SHong Zhang     }
1931cb20be35SHong Zhang   }
1932c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
1933660d5466SHong Zhang   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
19343462b7efSHong Zhang   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1935660d5466SHong Zhang   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1936cb20be35SHong Zhang   PetscFunctionReturn(0);
1937cb20be35SHong Zhang }
1938cb20be35SHong Zhang 
1939cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1940cb20be35SHong Zhang {
1941cb20be35SHong Zhang   PetscErrorCode        ierr;
1942baa3c1c6SHong Zhang   Mat                   Cdense;
1943cb20be35SHong Zhang   MPI_Comm              comm;
1944baa3c1c6SHong Zhang   PetscMPIInt           size;
1945660d5466SHong Zhang   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1946baa3c1c6SHong Zhang   Mat_MPIDense          *c;
1947baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
1948cb20be35SHong Zhang 
1949cb20be35SHong Zhang   PetscFunctionBegin;
1950baa3c1c6SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1951cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1952cb20be35SHong Zhang     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1953cb20be35SHong Zhang   }
1954cb20be35SHong Zhang 
1955baa3c1c6SHong Zhang   /* create matrix product Cdense */
1956baa3c1c6SHong Zhang   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1957660d5466SHong Zhang   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1958baa3c1c6SHong Zhang   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1959baa3c1c6SHong Zhang   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1960baa3c1c6SHong Zhang   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961baa3c1c6SHong Zhang   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962baa3c1c6SHong Zhang   *C   = Cdense;
1963baa3c1c6SHong Zhang 
1964baa3c1c6SHong Zhang   /* create data structure for reuse Cdense */
1965baa3c1c6SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1966baa3c1c6SHong Zhang   ierr = PetscNew(&atb);CHKERRQ(ierr);
1967660d5466SHong Zhang   cM = Cdense->rmap->N;
1968c5ef1628SHong Zhang   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1969baa3c1c6SHong Zhang 
1970baa3c1c6SHong Zhang   c                    = (Mat_MPIDense*)Cdense->data;
1971baa3c1c6SHong Zhang   c->atbdense          = atb;
1972baa3c1c6SHong Zhang   atb->destroy         = Cdense->ops->destroy;
1973baa3c1c6SHong Zhang   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1974cb20be35SHong Zhang   PetscFunctionReturn(0);
1975cb20be35SHong Zhang }
1976cb20be35SHong Zhang 
1977cb20be35SHong Zhang PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1978cb20be35SHong Zhang {
1979cb20be35SHong Zhang   PetscErrorCode ierr;
1980cb20be35SHong Zhang 
1981cb20be35SHong Zhang   PetscFunctionBegin;
1982cb20be35SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
1983cb20be35SHong Zhang     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1984cb20be35SHong Zhang   }
1985cb20be35SHong Zhang   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1986cb20be35SHong Zhang   PetscFunctionReturn(0);
1987cb20be35SHong Zhang }
1988320f2790SHong Zhang 
1989*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
1990*cc48ffa7SToby Isaac {
1991*cc48ffa7SToby Isaac   PetscErrorCode        ierr;
1992*cc48ffa7SToby Isaac   Mat                   Cdense;
1993*cc48ffa7SToby Isaac   MPI_Comm              comm;
1994*cc48ffa7SToby Isaac   PetscMPIInt           i, size;
1995*cc48ffa7SToby Isaac   PetscInt              maxRows, bufsiz;
1996*cc48ffa7SToby Isaac   Mat_MPIDense          *c;
1997*cc48ffa7SToby Isaac   PetscMPIInt           tag;
1998*cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt;
1999*cc48ffa7SToby Isaac 
2000*cc48ffa7SToby Isaac   PetscFunctionBegin;
2001*cc48ffa7SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2002*cc48ffa7SToby Isaac   if (A->cmap->N != B->cmap->N) {
2003*cc48ffa7SToby Isaac     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2004*cc48ffa7SToby Isaac   }
2005*cc48ffa7SToby Isaac 
2006*cc48ffa7SToby Isaac   /* create matrix product Cdense */
2007*cc48ffa7SToby Isaac   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2008*cc48ffa7SToby Isaac   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2009*cc48ffa7SToby Isaac   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2010*cc48ffa7SToby Isaac   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2011*cc48ffa7SToby Isaac   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2012*cc48ffa7SToby Isaac   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013*cc48ffa7SToby Isaac   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014*cc48ffa7SToby Isaac   *C   = Cdense;
2015*cc48ffa7SToby Isaac 
2016*cc48ffa7SToby Isaac   /* create data structure for reuse Cdense */
2017*cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2018*cc48ffa7SToby Isaac   ierr = PetscNew(&abt);CHKERRQ(ierr);
2019*cc48ffa7SToby Isaac   abt->tag = tag;
2020*cc48ffa7SToby Isaac   for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2021*cc48ffa7SToby Isaac   bufsiz = A->cmap->N * maxRows;
2022*cc48ffa7SToby Isaac   ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2023*cc48ffa7SToby Isaac 
2024*cc48ffa7SToby Isaac   c                    = (Mat_MPIDense*)Cdense->data;
2025*cc48ffa7SToby Isaac   c->abtdense          = abt;
2026*cc48ffa7SToby Isaac   abt->destroy         = Cdense->ops->destroy;
2027*cc48ffa7SToby Isaac   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2028*cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2029*cc48ffa7SToby Isaac }
2030*cc48ffa7SToby Isaac 
2031*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2032*cc48ffa7SToby Isaac {
2033*cc48ffa7SToby Isaac   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2034*cc48ffa7SToby Isaac   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2035*cc48ffa7SToby Isaac   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2036*cc48ffa7SToby Isaac   Mat_MatTransMultDense *abt = c->abtdense;
2037*cc48ffa7SToby Isaac   PetscErrorCode ierr;
2038*cc48ffa7SToby Isaac   MPI_Comm       comm;
2039*cc48ffa7SToby Isaac   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2040*cc48ffa7SToby Isaac   PetscScalar    *sendbuf, *recvbuf, *carray;
2041*cc48ffa7SToby Isaac   PetscInt       i,cK=A->cmap->N,k,j,bn;
2042*cc48ffa7SToby Isaac   PetscScalar    _DOne=1.0,_DZero=0.0;
2043*cc48ffa7SToby Isaac   PetscBLASInt   cm, cn, ck;
2044*cc48ffa7SToby Isaac   MPI_Request    reqs[2];
2045*cc48ffa7SToby Isaac   const PetscInt *ranges;
2046*cc48ffa7SToby Isaac 
2047*cc48ffa7SToby Isaac   PetscFunctionBegin;
2048*cc48ffa7SToby Isaac   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2049*cc48ffa7SToby Isaac   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2050*cc48ffa7SToby Isaac   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2051*cc48ffa7SToby Isaac 
2052*cc48ffa7SToby Isaac   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2053*cc48ffa7SToby Isaac   bn = B->rmap->n;
2054*cc48ffa7SToby Isaac   if (bseq->lda == bn) {
2055*cc48ffa7SToby Isaac     sendbuf = bseq->v;
2056*cc48ffa7SToby Isaac   } else {
2057*cc48ffa7SToby Isaac     sendbuf = abt->buf[0];
2058*cc48ffa7SToby Isaac     for (k = 0, i = 0; i < cK; i++) {
2059*cc48ffa7SToby Isaac       for (j = 0; j < bn; j++, k++) {
2060*cc48ffa7SToby Isaac         sendbuf[k] = bseq->v[i * bseq->lda + j];
2061*cc48ffa7SToby Isaac       }
2062*cc48ffa7SToby Isaac     }
2063*cc48ffa7SToby Isaac   }
2064*cc48ffa7SToby Isaac   if (size > 1) {
2065*cc48ffa7SToby Isaac     sendto = (rank + size - 1) % size;
2066*cc48ffa7SToby Isaac     recvfrom = (rank + size + 1) % size;
2067*cc48ffa7SToby Isaac   } else {
2068*cc48ffa7SToby Isaac     sendto = recvfrom = 0;
2069*cc48ffa7SToby Isaac   }
2070*cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2071*cc48ffa7SToby Isaac   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2072*cc48ffa7SToby Isaac   recvisfrom = rank;
2073*cc48ffa7SToby Isaac   for (i = 0; i < size; i++) {
2074*cc48ffa7SToby Isaac     /* we have finished receiving in sending, bufs can be read/modified */
2075*cc48ffa7SToby Isaac     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2076*cc48ffa7SToby Isaac     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2077*cc48ffa7SToby Isaac 
2078*cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2079*cc48ffa7SToby Isaac       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2080*cc48ffa7SToby Isaac       sendsiz = cK * bn;
2081*cc48ffa7SToby Isaac       recvsiz = cK * nextbn;
2082*cc48ffa7SToby Isaac       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2083*cc48ffa7SToby Isaac       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2084*cc48ffa7SToby Isaac       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2085*cc48ffa7SToby Isaac     }
2086*cc48ffa7SToby Isaac 
2087*cc48ffa7SToby Isaac     /* local aseq * sendbuf^T */
2088*cc48ffa7SToby Isaac     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2089*cc48ffa7SToby Isaac     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2090*cc48ffa7SToby Isaac     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2091*cc48ffa7SToby Isaac 
2092*cc48ffa7SToby Isaac     if (nextrecvisfrom != rank) {
2093*cc48ffa7SToby Isaac       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2094*cc48ffa7SToby Isaac       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2095*cc48ffa7SToby Isaac     }
2096*cc48ffa7SToby Isaac     bn = nextbn;
2097*cc48ffa7SToby Isaac     recvisfrom = nextrecvisfrom;
2098*cc48ffa7SToby Isaac     sendbuf = recvbuf;
2099*cc48ffa7SToby Isaac   }
2100*cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2101*cc48ffa7SToby Isaac }
2102*cc48ffa7SToby Isaac 
2103*cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2104*cc48ffa7SToby Isaac {
2105*cc48ffa7SToby Isaac   PetscErrorCode ierr;
2106*cc48ffa7SToby Isaac 
2107*cc48ffa7SToby Isaac   PetscFunctionBegin;
2108*cc48ffa7SToby Isaac   if (scall == MAT_INITIAL_MATRIX) {
2109*cc48ffa7SToby Isaac     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2110*cc48ffa7SToby Isaac   }
2111*cc48ffa7SToby Isaac   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2112*cc48ffa7SToby Isaac   PetscFunctionReturn(0);
2113*cc48ffa7SToby Isaac }
2114*cc48ffa7SToby Isaac 
2115320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2116320f2790SHong Zhang {
2117320f2790SHong Zhang   PetscErrorCode   ierr;
2118320f2790SHong Zhang   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2119320f2790SHong Zhang   Mat_MatMultDense *ab = a->abdense;
2120320f2790SHong Zhang 
2121320f2790SHong Zhang   PetscFunctionBegin;
2122320f2790SHong Zhang   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2123320f2790SHong Zhang   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2124320f2790SHong Zhang   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2125320f2790SHong Zhang 
2126320f2790SHong Zhang   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2127320f2790SHong Zhang   ierr = PetscFree(ab);CHKERRQ(ierr);
2128320f2790SHong Zhang   PetscFunctionReturn(0);
2129320f2790SHong Zhang }
2130320f2790SHong Zhang 
21315242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
2132320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2133320f2790SHong Zhang {
2134320f2790SHong Zhang   PetscErrorCode   ierr;
2135320f2790SHong Zhang   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2136320f2790SHong Zhang   Mat_MatMultDense *ab=c->abdense;
2137320f2790SHong Zhang 
2138320f2790SHong Zhang   PetscFunctionBegin;
2139de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2140de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2141320f2790SHong Zhang   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2142de0a22f0SHong Zhang   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2143320f2790SHong Zhang   PetscFunctionReturn(0);
2144320f2790SHong Zhang }
2145320f2790SHong Zhang 
2146320f2790SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2147320f2790SHong Zhang {
2148320f2790SHong Zhang   PetscErrorCode   ierr;
2149320f2790SHong Zhang   Mat              Ae,Be,Ce;
2150320f2790SHong Zhang   Mat_MPIDense     *c;
2151320f2790SHong Zhang   Mat_MatMultDense *ab;
2152320f2790SHong Zhang 
2153320f2790SHong Zhang   PetscFunctionBegin;
2154320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2155378336b6SHong Zhang     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2156320f2790SHong Zhang   }
2157320f2790SHong Zhang 
2158320f2790SHong Zhang   /* convert A and B to Elemental matrices Ae and Be */
2159320f2790SHong Zhang   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2160320f2790SHong Zhang   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2161320f2790SHong Zhang 
2162320f2790SHong Zhang   /* Ce = Ae*Be */
2163320f2790SHong Zhang   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2164320f2790SHong Zhang   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2165320f2790SHong Zhang 
2166320f2790SHong Zhang   /* convert Ce to C */
2167320f2790SHong Zhang   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2168320f2790SHong Zhang 
2169320f2790SHong Zhang   /* create data structure for reuse Cdense */
2170320f2790SHong Zhang   ierr = PetscNew(&ab);CHKERRQ(ierr);
2171320f2790SHong Zhang   c                  = (Mat_MPIDense*)(*C)->data;
2172320f2790SHong Zhang   c->abdense         = ab;
2173320f2790SHong Zhang 
2174320f2790SHong Zhang   ab->Ae             = Ae;
2175320f2790SHong Zhang   ab->Be             = Be;
2176320f2790SHong Zhang   ab->Ce             = Ce;
2177320f2790SHong Zhang   ab->destroy        = (*C)->ops->destroy;
2178320f2790SHong Zhang   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
21799775878aSHong Zhang   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2180320f2790SHong Zhang   PetscFunctionReturn(0);
2181320f2790SHong Zhang }
2182320f2790SHong Zhang 
2183150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2184320f2790SHong Zhang {
2185320f2790SHong Zhang   PetscErrorCode ierr;
2186320f2790SHong Zhang 
2187320f2790SHong Zhang   PetscFunctionBegin;
2188320f2790SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
218957299f2fSHong Zhang     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2190320f2790SHong Zhang     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
219157299f2fSHong Zhang     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2192320f2790SHong Zhang   } else {
219357299f2fSHong Zhang     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2194320f2790SHong Zhang     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
219557299f2fSHong Zhang     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2196320f2790SHong Zhang   }
2197320f2790SHong Zhang   PetscFunctionReturn(0);
2198320f2790SHong Zhang }
21995242a7b1SHong Zhang #endif
220086aefd0dSHong Zhang 
2201