xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7dae84e064f15683b0c1756735f0ad1de62763f6)
1be1d678aSKris Buschelman 
2ed3cc1f0SBarry Smith /*
3ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
4ed3cc1f0SBarry Smith */
5ed3cc1f0SBarry Smith 
604fea9ffSBarry Smith 
7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
9baa3c1c6SHong Zhang #include <petscblaslapack.h>
108965ea79SLois Curfman McInnes 
11ab92ecdeSBarry Smith /*@
12ab92ecdeSBarry Smith 
13ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
15ab92ecdeSBarry Smith 
16ab92ecdeSBarry Smith     Input Parameter:
17ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
18ab92ecdeSBarry Smith 
19ab92ecdeSBarry Smith     Output Parameter:
20ab92ecdeSBarry Smith .      B - the inner matrix
21ab92ecdeSBarry Smith 
228e6c10adSSatish Balay     Level: intermediate
238e6c10adSSatish Balay 
24ab92ecdeSBarry Smith @*/
25ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26ab92ecdeSBarry Smith {
27ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28ab92ecdeSBarry Smith   PetscErrorCode ierr;
29ace3abfcSBarry Smith   PetscBool      flg;
30ab92ecdeSBarry Smith 
31ab92ecdeSBarry Smith   PetscFunctionBegin;
32251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
332205254eSKarl Rupp   if (flg) *B = mat->A;
342205254eSKarl Rupp   else *B = A;
35ab92ecdeSBarry Smith   PetscFunctionReturn(0);
36ab92ecdeSBarry Smith }
37ab92ecdeSBarry Smith 
38ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39ba8c8a56SBarry Smith {
40ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41ba8c8a56SBarry Smith   PetscErrorCode ierr;
42d0f46423SBarry Smith   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43ba8c8a56SBarry Smith 
44ba8c8a56SBarry Smith   PetscFunctionBegin;
45e7e72b3dSBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46ba8c8a56SBarry Smith   lrow = row - rstart;
47ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48ba8c8a56SBarry Smith   PetscFunctionReturn(0);
49ba8c8a56SBarry Smith }
50ba8c8a56SBarry Smith 
51ba8c8a56SBarry Smith PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52ba8c8a56SBarry Smith {
53ba8c8a56SBarry Smith   PetscErrorCode ierr;
54ba8c8a56SBarry Smith 
55ba8c8a56SBarry Smith   PetscFunctionBegin;
56ba8c8a56SBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57ba8c8a56SBarry Smith   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58ba8c8a56SBarry Smith   PetscFunctionReturn(0);
59ba8c8a56SBarry Smith }
60ba8c8a56SBarry Smith 
6111bd1e4dSLisandro Dalcin PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
620de54da6SSatish Balay {
630de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
646849ba73SBarry Smith   PetscErrorCode ierr;
65d0f46423SBarry Smith   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
6687828ca2SBarry Smith   PetscScalar    *array;
670de54da6SSatish Balay   MPI_Comm       comm;
6811bd1e4dSLisandro Dalcin   Mat            B;
690de54da6SSatish Balay 
700de54da6SSatish Balay   PetscFunctionBegin;
71e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
720de54da6SSatish Balay 
7311bd1e4dSLisandro Dalcin   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
7411bd1e4dSLisandro Dalcin   if (!B) {
750de54da6SSatish Balay     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
7611bd1e4dSLisandro Dalcin     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
7711bd1e4dSLisandro Dalcin     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
7811bd1e4dSLisandro Dalcin     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
798c778c55SBarry Smith     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
8011bd1e4dSLisandro Dalcin     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
818c778c55SBarry Smith     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
8211bd1e4dSLisandro Dalcin     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8311bd1e4dSLisandro Dalcin     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
8411bd1e4dSLisandro Dalcin     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
8511bd1e4dSLisandro Dalcin     *a   = B;
8611bd1e4dSLisandro Dalcin     ierr = MatDestroy(&B);CHKERRQ(ierr);
872205254eSKarl Rupp   } else *a = B;
880de54da6SSatish Balay   PetscFunctionReturn(0);
890de54da6SSatish Balay }
900de54da6SSatish Balay 
9113f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
928965ea79SLois Curfman McInnes {
9339b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94dfbe8321SBarry Smith   PetscErrorCode ierr;
95d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96ace3abfcSBarry Smith   PetscBool      roworiented = A->roworiented;
978965ea79SLois Curfman McInnes 
983a40ed3dSBarry Smith   PetscFunctionBegin;
998965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1005ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
101e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1028965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1038965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
10439b7565bSBarry Smith       if (roworiented) {
10539b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1063a40ed3dSBarry Smith       } else {
1078965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1085ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
109e32f2f54SBarry Smith           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
11039b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
11139b7565bSBarry Smith         }
1128965ea79SLois Curfman McInnes       }
1132205254eSKarl Rupp     } else if (!A->donotstash) {
1145080c13bSMatthew G Knepley       mat->assembled = PETSC_FALSE;
11539b7565bSBarry Smith       if (roworiented) {
116b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117d36fbae8SSatish Balay       } else {
118b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
11939b7565bSBarry Smith       }
120b49de8d1SLois Curfman McInnes     }
121b49de8d1SLois Curfman McInnes   }
1223a40ed3dSBarry Smith   PetscFunctionReturn(0);
123b49de8d1SLois Curfman McInnes }
124b49de8d1SLois Curfman McInnes 
12513f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126b49de8d1SLois Curfman McInnes {
127b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128dfbe8321SBarry Smith   PetscErrorCode ierr;
129d0f46423SBarry Smith   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130b49de8d1SLois Curfman McInnes 
1313a40ed3dSBarry Smith   PetscFunctionBegin;
132b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
133e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
136b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
137b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
138e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139e7e72b3dSBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141b49de8d1SLois Curfman McInnes       }
142e7e72b3dSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
1438965ea79SLois Curfman McInnes   }
1443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1458965ea79SLois Curfman McInnes }
1468965ea79SLois Curfman McInnes 
1478c778c55SBarry Smith PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148ff14e315SSatish Balay {
149ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150dfbe8321SBarry Smith   PetscErrorCode ierr;
151ff14e315SSatish Balay 
1523a40ed3dSBarry Smith   PetscFunctionBegin;
1538c778c55SBarry Smith   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
1543a40ed3dSBarry Smith   PetscFunctionReturn(0);
155ff14e315SSatish Balay }
156ff14e315SSatish Balay 
157*7dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
158ca3fa75bSLois Curfman McInnes {
159ca3fa75bSLois Curfman McInnes   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
160ca3fa75bSLois Curfman McInnes   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
1616849ba73SBarry Smith   PetscErrorCode ierr;
1624aa3045dSJed Brown   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
1635d0c19d7SBarry Smith   const PetscInt *irow,*icol;
16487828ca2SBarry Smith   PetscScalar    *av,*bv,*v = lmat->v;
165ca3fa75bSLois Curfman McInnes   Mat            newmat;
1664aa3045dSJed Brown   IS             iscol_local;
167ca3fa75bSLois Curfman McInnes 
168ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
1694aa3045dSJed Brown   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
170ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
1714aa3045dSJed Brown   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
172b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
173b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
1744aa3045dSJed Brown   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
175ca3fa75bSLois Curfman McInnes 
176ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
1777eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
1787eba5e9cSLois Curfman McInnes      original matrix! */
179ca3fa75bSLois Curfman McInnes 
180ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
1817eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
182ca3fa75bSLois Curfman McInnes 
183ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
184ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
185e32f2f54SBarry Smith     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
1867eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
187ca3fa75bSLois Curfman McInnes     newmat = *B;
188ca3fa75bSLois Curfman McInnes   } else {
189ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
190ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
1914aa3045dSJed Brown     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
1927adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1930298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
194ca3fa75bSLois Curfman McInnes   }
195ca3fa75bSLois Curfman McInnes 
196ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
197ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
198ca3fa75bSLois Curfman McInnes   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
199ca3fa75bSLois Curfman McInnes 
2004aa3045dSJed Brown   for (i=0; i<Ncols; i++) {
20125a33276SHong Zhang     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
202ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2037eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
204ca3fa75bSLois Curfman McInnes     }
205ca3fa75bSLois Curfman McInnes   }
206ca3fa75bSLois Curfman McInnes 
207ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
208ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
209ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
210ca3fa75bSLois Curfman McInnes 
211ca3fa75bSLois Curfman McInnes   /* Free work space */
212ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
2135bdf786aSShri Abhyankar   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
21432bb1f2dSLisandro Dalcin   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
215ca3fa75bSLois Curfman McInnes   *B   = newmat;
216ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
217ca3fa75bSLois Curfman McInnes }
218ca3fa75bSLois Curfman McInnes 
2198c778c55SBarry Smith PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
220ff14e315SSatish Balay {
22173a71a0fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
22273a71a0fSBarry Smith   PetscErrorCode ierr;
22373a71a0fSBarry Smith 
2243a40ed3dSBarry Smith   PetscFunctionBegin;
2258c778c55SBarry Smith   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
2263a40ed3dSBarry Smith   PetscFunctionReturn(0);
227ff14e315SSatish Balay }
228ff14e315SSatish Balay 
229dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
2308965ea79SLois Curfman McInnes {
23139ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
232ce94432eSBarry Smith   MPI_Comm       comm;
233dfbe8321SBarry Smith   PetscErrorCode ierr;
23413f74950SBarry Smith   PetscInt       nstash,reallocs;
2358965ea79SLois Curfman McInnes   InsertMode     addv;
2368965ea79SLois Curfman McInnes 
2373a40ed3dSBarry Smith   PetscFunctionBegin;
238ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2398965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
240b2566f29SBarry Smith   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
241ce94432eSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
242e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
2438965ea79SLois Curfman McInnes 
244d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
2458798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
246ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
2473a40ed3dSBarry Smith   PetscFunctionReturn(0);
2488965ea79SLois Curfman McInnes }
2498965ea79SLois Curfman McInnes 
250dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
2518965ea79SLois Curfman McInnes {
25239ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
2536849ba73SBarry Smith   PetscErrorCode ierr;
25413f74950SBarry Smith   PetscInt       i,*row,*col,flg,j,rstart,ncols;
25513f74950SBarry Smith   PetscMPIInt    n;
25687828ca2SBarry Smith   PetscScalar    *val;
2578965ea79SLois Curfman McInnes 
2583a40ed3dSBarry Smith   PetscFunctionBegin;
2598965ea79SLois Curfman McInnes   /*  wait on receives */
2607ef1d9bdSSatish Balay   while (1) {
2618798bf22SSatish Balay     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
2627ef1d9bdSSatish Balay     if (!flg) break;
2638965ea79SLois Curfman McInnes 
2647ef1d9bdSSatish Balay     for (i=0; i<n;) {
2657ef1d9bdSSatish Balay       /* Now identify the consecutive vals belonging to the same row */
2662205254eSKarl Rupp       for (j=i,rstart=row[j]; j<n; j++) {
2672205254eSKarl Rupp         if (row[j] != rstart) break;
2682205254eSKarl Rupp       }
2697ef1d9bdSSatish Balay       if (j < n) ncols = j-i;
2707ef1d9bdSSatish Balay       else       ncols = n-i;
2717ef1d9bdSSatish Balay       /* Now assemble all these values with a single function call */
2724b4eb8d3SJed Brown       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
2737ef1d9bdSSatish Balay       i    = j;
2748965ea79SLois Curfman McInnes     }
2757ef1d9bdSSatish Balay   }
2768798bf22SSatish Balay   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
2778965ea79SLois Curfman McInnes 
27839ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
27939ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
2808965ea79SLois Curfman McInnes 
2816d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
28239ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2838965ea79SLois Curfman McInnes   }
2843a40ed3dSBarry Smith   PetscFunctionReturn(0);
2858965ea79SLois Curfman McInnes }
2868965ea79SLois Curfman McInnes 
287dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
2888965ea79SLois Curfman McInnes {
289dfbe8321SBarry Smith   PetscErrorCode ierr;
29039ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
2913a40ed3dSBarry Smith 
2923a40ed3dSBarry Smith   PetscFunctionBegin;
2933a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
2943a40ed3dSBarry Smith   PetscFunctionReturn(0);
2958965ea79SLois Curfman McInnes }
2968965ea79SLois Curfman McInnes 
2978965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
2988965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
2998965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
3003501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
3018965ea79SLois Curfman McInnes    routine.
3028965ea79SLois Curfman McInnes */
3032b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
3048965ea79SLois Curfman McInnes {
30539ddd567SLois Curfman McInnes   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
3066849ba73SBarry Smith   PetscErrorCode    ierr;
307d0f46423SBarry Smith   PetscInt          i,*owners = A->rmap->range;
30876ec1555SBarry Smith   PetscInt          *sizes,j,idx,nsends;
30913f74950SBarry Smith   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
3107adad957SLisandro Dalcin   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
31113f74950SBarry Smith   PetscInt          *lens,*lrows,*values;
31213f74950SBarry Smith   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
313ce94432eSBarry Smith   MPI_Comm          comm;
3148965ea79SLois Curfman McInnes   MPI_Request       *send_waits,*recv_waits;
3158965ea79SLois Curfman McInnes   MPI_Status        recv_status,*send_status;
316ace3abfcSBarry Smith   PetscBool         found;
31797b48c8fSBarry Smith   const PetscScalar *xx;
31897b48c8fSBarry Smith   PetscScalar       *bb;
3198965ea79SLois Curfman McInnes 
3203a40ed3dSBarry Smith   PetscFunctionBegin;
321ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
322ce94432eSBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
323b9679d65SBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
3248965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
325f628708eSJed Brown   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
326037dbc42SBarry Smith   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
3278965ea79SLois Curfman McInnes   for (i=0; i<N; i++) {
3288965ea79SLois Curfman McInnes     idx   = rows[i];
32935d8aa7fSBarry Smith     found = PETSC_FALSE;
3308965ea79SLois Curfman McInnes     for (j=0; j<size; j++) {
3318965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
33276ec1555SBarry Smith         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
3338965ea79SLois Curfman McInnes       }
3348965ea79SLois Curfman McInnes     }
335e32f2f54SBarry Smith     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
3368965ea79SLois Curfman McInnes   }
3372205254eSKarl Rupp   nsends = 0;
33876ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
3398965ea79SLois Curfman McInnes 
3408965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
34176ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
3428965ea79SLois Curfman McInnes 
3438965ea79SLois Curfman McInnes   /* post receives:   */
344785e854fSJed Brown   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
345854ce69bSBarry Smith   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
3468965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
34713f74950SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3488965ea79SLois Curfman McInnes   }
3498965ea79SLois Curfman McInnes 
3508965ea79SLois Curfman McInnes   /* do sends:
3518965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
3528965ea79SLois Curfman McInnes          the ith processor
3538965ea79SLois Curfman McInnes   */
354854ce69bSBarry Smith   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
355854ce69bSBarry Smith   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
356854ce69bSBarry Smith   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
3578965ea79SLois Curfman McInnes 
3588965ea79SLois Curfman McInnes   starts[0] = 0;
35976ec1555SBarry Smith   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
3602205254eSKarl Rupp   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
3612205254eSKarl Rupp 
3622205254eSKarl Rupp   starts[0] = 0;
36376ec1555SBarry Smith   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
3648965ea79SLois Curfman McInnes   count = 0;
3658965ea79SLois Curfman McInnes   for (i=0; i<size; i++) {
36676ec1555SBarry Smith     if (sizes[2*i+1]) {
36776ec1555SBarry Smith       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
3688965ea79SLois Curfman McInnes     }
3698965ea79SLois Curfman McInnes   }
370606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
3718965ea79SLois Curfman McInnes 
3728965ea79SLois Curfman McInnes   base = owners[rank];
3738965ea79SLois Curfman McInnes 
3748965ea79SLois Curfman McInnes   /*  wait on receives */
375dcca6d9dSJed Brown   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
37674ed9c26SBarry Smith   count = nrecvs;
37774ed9c26SBarry Smith   slen  = 0;
3788965ea79SLois Curfman McInnes   while (count) {
379ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
3808965ea79SLois Curfman McInnes     /* unpack receives into our local space */
38113f74950SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
3822205254eSKarl Rupp 
3838965ea79SLois Curfman McInnes     source[imdex] = recv_status.MPI_SOURCE;
3848965ea79SLois Curfman McInnes     lens[imdex]   = n;
3858965ea79SLois Curfman McInnes     slen += n;
3868965ea79SLois Curfman McInnes     count--;
3878965ea79SLois Curfman McInnes   }
388606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
3898965ea79SLois Curfman McInnes 
3908965ea79SLois Curfman McInnes   /* move the data into the send scatter */
391854ce69bSBarry Smith   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
3928965ea79SLois Curfman McInnes   count = 0;
3938965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
3948965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
3958965ea79SLois Curfman McInnes     for (j=0; j<lens[i]; j++) {
3968965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
3978965ea79SLois Curfman McInnes     }
3988965ea79SLois Curfman McInnes   }
399606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
40074ed9c26SBarry Smith   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
401606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
40276ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
4038965ea79SLois Curfman McInnes 
40497b48c8fSBarry Smith   /* fix right hand side if needed */
40597b48c8fSBarry Smith   if (x && b) {
40697b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
40797b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
40897b48c8fSBarry Smith     for (i=0; i<slen; i++) {
40997b48c8fSBarry Smith       bb[lrows[i]] = diag*xx[lrows[i]];
41097b48c8fSBarry Smith     }
41197b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
41297b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
41397b48c8fSBarry Smith   }
41497b48c8fSBarry Smith 
4158965ea79SLois Curfman McInnes   /* actually zap the local rows */
416b9679d65SBarry Smith   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
417e2eb51b1SBarry Smith   if (diag != 0.0) {
418b9679d65SBarry Smith     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
419b9679d65SBarry Smith     PetscInt     m   = ll->lda, i;
420b9679d65SBarry Smith 
421b9679d65SBarry Smith     for (i=0; i<slen; i++) {
422b9679d65SBarry Smith       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
423b9679d65SBarry Smith     }
424b9679d65SBarry Smith   }
425606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4268965ea79SLois Curfman McInnes 
4278965ea79SLois Curfman McInnes   /* wait on sends */
4288965ea79SLois Curfman McInnes   if (nsends) {
429785e854fSJed Brown     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
430ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
431606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
4328965ea79SLois Curfman McInnes   }
433606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
434606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
4353a40ed3dSBarry Smith   PetscFunctionReturn(0);
4368965ea79SLois Curfman McInnes }
4378965ea79SLois Curfman McInnes 
438cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
442cc2e6a90SBarry Smith 
443dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4448965ea79SLois Curfman McInnes {
44539ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
446dfbe8321SBarry Smith   PetscErrorCode ierr;
447c456f294SBarry Smith 
4483a40ed3dSBarry Smith   PetscFunctionBegin;
449ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
450ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
451cc2e6a90SBarry Smith   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4523a40ed3dSBarry Smith   PetscFunctionReturn(0);
4538965ea79SLois Curfman McInnes }
4548965ea79SLois Curfman McInnes 
455dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4568965ea79SLois Curfman McInnes {
45739ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
458dfbe8321SBarry Smith   PetscErrorCode ierr;
459c456f294SBarry Smith 
4603a40ed3dSBarry Smith   PetscFunctionBegin;
461ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
462ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
463cc2e6a90SBarry Smith   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
4643a40ed3dSBarry Smith   PetscFunctionReturn(0);
4658965ea79SLois Curfman McInnes }
4668965ea79SLois Curfman McInnes 
467dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
468096963f5SLois Curfman McInnes {
469096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
470dfbe8321SBarry Smith   PetscErrorCode ierr;
47187828ca2SBarry Smith   PetscScalar    zero = 0.0;
472096963f5SLois Curfman McInnes 
4733a40ed3dSBarry Smith   PetscFunctionBegin;
4742dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
475cc2e6a90SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
476ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
477ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4783a40ed3dSBarry Smith   PetscFunctionReturn(0);
479096963f5SLois Curfman McInnes }
480096963f5SLois Curfman McInnes 
481dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
482096963f5SLois Curfman McInnes {
483096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
484dfbe8321SBarry Smith   PetscErrorCode ierr;
485096963f5SLois Curfman McInnes 
4863a40ed3dSBarry Smith   PetscFunctionBegin;
4873501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
488cc2e6a90SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
489ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
490ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4913a40ed3dSBarry Smith   PetscFunctionReturn(0);
492096963f5SLois Curfman McInnes }
493096963f5SLois Curfman McInnes 
494dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
4958965ea79SLois Curfman McInnes {
49639ddd567SLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
497096963f5SLois Curfman McInnes   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
498dfbe8321SBarry Smith   PetscErrorCode ierr;
499d0f46423SBarry Smith   PetscInt       len,i,n,m = A->rmap->n,radd;
50087828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
501ed3cc1f0SBarry Smith 
5023a40ed3dSBarry Smith   PetscFunctionBegin;
5032dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
5041ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
505096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
506e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
507d0f46423SBarry Smith   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
508d0f46423SBarry Smith   radd = A->rmap->rstart*m;
50944cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
510096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
511096963f5SLois Curfman McInnes   }
5121ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5133a40ed3dSBarry Smith   PetscFunctionReturn(0);
5148965ea79SLois Curfman McInnes }
5158965ea79SLois Curfman McInnes 
516dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5178965ea79SLois Curfman McInnes {
5183501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
519dfbe8321SBarry Smith   PetscErrorCode ierr;
520ed3cc1f0SBarry Smith 
5213a40ed3dSBarry Smith   PetscFunctionBegin;
522aa482453SBarry Smith #if defined(PETSC_USE_LOG)
523d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
5248965ea79SLois Curfman McInnes #endif
5258798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
5266bf464f9SBarry Smith   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
5276bf464f9SBarry Smith   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
5286bf464f9SBarry Smith   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
52901b82886SBarry Smith 
530bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
531dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
5328baccfbdSHong Zhang 
5338baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
5348baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
5358baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
5368baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
5378baccfbdSHong Zhang #endif
538bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
539bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
540bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
541bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5428baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5438baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5448baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
5453a40ed3dSBarry Smith   PetscFunctionReturn(0);
5468965ea79SLois Curfman McInnes }
54739ddd567SLois Curfman McInnes 
5486849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
5498965ea79SLois Curfman McInnes {
55039ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
551dfbe8321SBarry Smith   PetscErrorCode    ierr;
552aa05aa95SBarry Smith   PetscViewerFormat format;
553aa05aa95SBarry Smith   int               fd;
554d0f46423SBarry Smith   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
555aa05aa95SBarry Smith   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
556578230a0SSatish Balay   PetscScalar       *work,*v,*vv;
557aa05aa95SBarry Smith   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
5587056b6fcSBarry Smith 
5593a40ed3dSBarry Smith   PetscFunctionBegin;
56039ddd567SLois Curfman McInnes   if (mdn->size == 1) {
56139ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
562aa05aa95SBarry Smith   } else {
563aa05aa95SBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
564ce94432eSBarry Smith     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
565ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
566aa05aa95SBarry Smith 
567aa05aa95SBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
568f4403165SShri Abhyankar     if (format == PETSC_VIEWER_NATIVE) {
569aa05aa95SBarry Smith 
570aa05aa95SBarry Smith       if (!rank) {
571aa05aa95SBarry Smith         /* store the matrix as a dense matrix */
5720700a824SBarry Smith         header[0] = MAT_FILE_CLASSID;
573d0f46423SBarry Smith         header[1] = mat->rmap->N;
574aa05aa95SBarry Smith         header[2] = N;
575aa05aa95SBarry Smith         header[3] = MATRIX_BINARY_FORMAT_DENSE;
576aa05aa95SBarry Smith         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
577aa05aa95SBarry Smith 
578aa05aa95SBarry Smith         /* get largest work array needed for transposing array */
579d0f46423SBarry Smith         mmax = mat->rmap->n;
580aa05aa95SBarry Smith         for (i=1; i<size; i++) {
581d0f46423SBarry Smith           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
5828965ea79SLois Curfman McInnes         }
583785e854fSJed Brown         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
584aa05aa95SBarry Smith 
585aa05aa95SBarry Smith         /* write out local array, by rows */
586d0f46423SBarry Smith         m = mat->rmap->n;
587aa05aa95SBarry Smith         v = a->v;
588aa05aa95SBarry Smith         for (j=0; j<N; j++) {
589aa05aa95SBarry Smith           for (i=0; i<m; i++) {
590578230a0SSatish Balay             work[j + i*N] = *v++;
591aa05aa95SBarry Smith           }
592aa05aa95SBarry Smith         }
593aa05aa95SBarry Smith         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
594aa05aa95SBarry Smith         /* get largest work array to receive messages from other processes, excludes process zero */
595aa05aa95SBarry Smith         mmax = 0;
596aa05aa95SBarry Smith         for (i=1; i<size; i++) {
597d0f46423SBarry Smith           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
598aa05aa95SBarry Smith         }
599785e854fSJed Brown         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
600aa05aa95SBarry Smith         for (k = 1; k < size; k++) {
601f8009846SMatthew Knepley           v    = vv;
602d0f46423SBarry Smith           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
603ce94432eSBarry Smith           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
604aa05aa95SBarry Smith 
605aa05aa95SBarry Smith           for (j = 0; j < N; j++) {
606aa05aa95SBarry Smith             for (i = 0; i < m; i++) {
607578230a0SSatish Balay               work[j + i*N] = *v++;
608aa05aa95SBarry Smith             }
609aa05aa95SBarry Smith           }
610aa05aa95SBarry Smith           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
611aa05aa95SBarry Smith         }
612aa05aa95SBarry Smith         ierr = PetscFree(work);CHKERRQ(ierr);
613578230a0SSatish Balay         ierr = PetscFree(vv);CHKERRQ(ierr);
614aa05aa95SBarry Smith       } else {
615ce94432eSBarry Smith         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
616aa05aa95SBarry Smith       }
6176a9046bcSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
618aa05aa95SBarry Smith   }
6193a40ed3dSBarry Smith   PetscFunctionReturn(0);
6208965ea79SLois Curfman McInnes }
6218965ea79SLois Curfman McInnes 
6227da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
6239804daf3SBarry Smith #include <petscdraw.h>
6246849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
6258965ea79SLois Curfman McInnes {
62639ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
627dfbe8321SBarry Smith   PetscErrorCode    ierr;
6287da1fb6eSBarry Smith   PetscMPIInt       rank = mdn->rank;
62919fd82e9SBarry Smith   PetscViewerType   vtype;
630ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
631b0a32e0cSBarry Smith   PetscViewer       sviewer;
632f3ef73ceSBarry Smith   PetscViewerFormat format;
6338965ea79SLois Curfman McInnes 
6343a40ed3dSBarry Smith   PetscFunctionBegin;
635251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
636251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
63732077d6dSBarry Smith   if (iascii) {
638b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
639b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
640456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6414e220ebcSLois Curfman McInnes       MatInfo info;
642888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
6431575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
6447b23a99aSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
645b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6461575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
6475d00a290SHong Zhang       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
6483a40ed3dSBarry Smith       PetscFunctionReturn(0);
649fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6503a40ed3dSBarry Smith       PetscFunctionReturn(0);
6518965ea79SLois Curfman McInnes     }
652f1af5d2fSBarry Smith   } else if (isdraw) {
653b0a32e0cSBarry Smith     PetscDraw draw;
654ace3abfcSBarry Smith     PetscBool isnull;
655f1af5d2fSBarry Smith 
656b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
657b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
658f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
659f1af5d2fSBarry Smith   }
66077ed5343SBarry Smith 
6617da1fb6eSBarry Smith   {
6628965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6638965ea79SLois Curfman McInnes     Mat         A;
664d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
665ba8c8a56SBarry Smith     PetscInt    *cols;
666ba8c8a56SBarry Smith     PetscScalar *vals;
6678965ea79SLois Curfman McInnes 
668ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
6698965ea79SLois Curfman McInnes     if (!rank) {
670f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
6713a40ed3dSBarry Smith     } else {
672f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
6738965ea79SLois Curfman McInnes     }
6747adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
675878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
6760298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
6773bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
6788965ea79SLois Curfman McInnes 
67939ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
68039ddd567SLois Curfman McInnes        but it's quick for now */
68151022da4SBarry Smith     A->insertmode = INSERT_VALUES;
6822205254eSKarl Rupp 
6832205254eSKarl Rupp     row = mat->rmap->rstart;
6842205254eSKarl Rupp     m   = mdn->A->rmap->n;
6858965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
686ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
687ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
688ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
68939ddd567SLois Curfman McInnes       row++;
6908965ea79SLois Curfman McInnes     }
6918965ea79SLois Curfman McInnes 
6926d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6936d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6943f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
695b9b97703SBarry Smith     if (!rank) {
6961a9d3c3cSBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
6977da1fb6eSBarry Smith       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
6988965ea79SLois Curfman McInnes     }
6993f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
700b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
7016bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
7028965ea79SLois Curfman McInnes   }
7033a40ed3dSBarry Smith   PetscFunctionReturn(0);
7048965ea79SLois Curfman McInnes }
7058965ea79SLois Curfman McInnes 
706dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
7078965ea79SLois Curfman McInnes {
708dfbe8321SBarry Smith   PetscErrorCode ierr;
709ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw,issocket;
7108965ea79SLois Curfman McInnes 
711433994e6SBarry Smith   PetscFunctionBegin;
712251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
713251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
714251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
715251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
7160f5bd95cSBarry Smith 
71732077d6dSBarry Smith   if (iascii || issocket || isdraw) {
718f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
7190f5bd95cSBarry Smith   } else if (isbinary) {
7203a40ed3dSBarry Smith     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
72111aeaf0aSBarry Smith   }
7223a40ed3dSBarry Smith   PetscFunctionReturn(0);
7238965ea79SLois Curfman McInnes }
7248965ea79SLois Curfman McInnes 
725dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
7268965ea79SLois Curfman McInnes {
7273501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
7283501a2bdSLois Curfman McInnes   Mat            mdn  = mat->A;
729dfbe8321SBarry Smith   PetscErrorCode ierr;
730329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
7318965ea79SLois Curfman McInnes 
7323a40ed3dSBarry Smith   PetscFunctionBegin;
7334e220ebcSLois Curfman McInnes   info->block_size = 1.0;
7342205254eSKarl Rupp 
7354e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
7362205254eSKarl Rupp 
7374e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
7384e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
7398965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7404e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7414e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7424e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7434e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7444e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7458965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
746b2566f29SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7472205254eSKarl Rupp 
7484e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7494e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7504e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7514e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7524e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7538965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
754b2566f29SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
7552205254eSKarl Rupp 
7564e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7574e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7584e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7594e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7604e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7618965ea79SLois Curfman McInnes   }
7624e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7634e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7644e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7653a40ed3dSBarry Smith   PetscFunctionReturn(0);
7668965ea79SLois Curfman McInnes }
7678965ea79SLois Curfman McInnes 
768ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
7698965ea79SLois Curfman McInnes {
77039ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
771dfbe8321SBarry Smith   PetscErrorCode ierr;
7728965ea79SLois Curfman McInnes 
7733a40ed3dSBarry Smith   PetscFunctionBegin;
77412c028f9SKris Buschelman   switch (op) {
775512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
77612c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
77712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
77843674050SBarry Smith     MatCheckPreallocated(A,1);
7794e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
78012c028f9SKris Buschelman     break;
78112c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
78243674050SBarry Smith     MatCheckPreallocated(A,1);
7834e0d8c25SBarry Smith     a->roworiented = flg;
7844e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
78512c028f9SKris Buschelman     break;
7864e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
78713fa8e87SLisandro Dalcin   case MAT_KEEP_NONZERO_PATTERN:
78812c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
789290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
79012c028f9SKris Buschelman     break;
79112c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
7924e0d8c25SBarry Smith     a->donotstash = flg;
79312c028f9SKris Buschelman     break;
79477e54ba9SKris Buschelman   case MAT_SYMMETRIC:
79577e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
7969a4540c5SBarry Smith   case MAT_HERMITIAN:
7979a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
798600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
799290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
80077e54ba9SKris Buschelman     break;
80112c028f9SKris Buschelman   default:
802e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
8033a40ed3dSBarry Smith   }
8043a40ed3dSBarry Smith   PetscFunctionReturn(0);
8058965ea79SLois Curfman McInnes }
8068965ea79SLois Curfman McInnes 
8078965ea79SLois Curfman McInnes 
808dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
8095b2fa520SLois Curfman McInnes {
8105b2fa520SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
8115b2fa520SLois Curfman McInnes   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
81287828ca2SBarry Smith   PetscScalar    *l,*r,x,*v;
813dfbe8321SBarry Smith   PetscErrorCode ierr;
814d0f46423SBarry Smith   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
8155b2fa520SLois Curfman McInnes 
8165b2fa520SLois Curfman McInnes   PetscFunctionBegin;
81772d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
8185b2fa520SLois Curfman McInnes   if (ll) {
81972d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
820e32f2f54SBarry Smith     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
8211ebc52fbSHong Zhang     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
8225b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
8235b2fa520SLois Curfman McInnes       x = l[i];
8245b2fa520SLois Curfman McInnes       v = mat->v + i;
8255b2fa520SLois Curfman McInnes       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
8265b2fa520SLois Curfman McInnes     }
8271ebc52fbSHong Zhang     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
828efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8295b2fa520SLois Curfman McInnes   }
8305b2fa520SLois Curfman McInnes   if (rr) {
831175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
832e32f2f54SBarry Smith     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
833ca9f406cSSatish Balay     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834ca9f406cSSatish Balay     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8351ebc52fbSHong Zhang     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
8365b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
8375b2fa520SLois Curfman McInnes       x = r[i];
8385b2fa520SLois Curfman McInnes       v = mat->v + i*m;
8392205254eSKarl Rupp       for (j=0; j<m; j++) (*v++) *= x;
8405b2fa520SLois Curfman McInnes     }
8411ebc52fbSHong Zhang     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
842efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8435b2fa520SLois Curfman McInnes   }
8445b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8455b2fa520SLois Curfman McInnes }
8465b2fa520SLois Curfman McInnes 
847dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
848096963f5SLois Curfman McInnes {
8493501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
8503501a2bdSLois Curfman McInnes   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851dfbe8321SBarry Smith   PetscErrorCode ierr;
85213f74950SBarry Smith   PetscInt       i,j;
853329f5518SBarry Smith   PetscReal      sum = 0.0;
85487828ca2SBarry Smith   PetscScalar    *v  = mat->v;
8553501a2bdSLois Curfman McInnes 
8563a40ed3dSBarry Smith   PetscFunctionBegin;
8573501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
858064f8208SBarry Smith     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
8593501a2bdSLois Curfman McInnes   } else {
8603501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
861d0f46423SBarry Smith       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
862329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
8633501a2bdSLois Curfman McInnes       }
864b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
8658f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
866dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
8673a40ed3dSBarry Smith     } else if (type == NORM_1) {
868329f5518SBarry Smith       PetscReal *tmp,*tmp2;
869dcca6d9dSJed Brown       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
87074ed9c26SBarry Smith       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
87174ed9c26SBarry Smith       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
872064f8208SBarry Smith       *nrm = 0.0;
8733501a2bdSLois Curfman McInnes       v    = mat->v;
874d0f46423SBarry Smith       for (j=0; j<mdn->A->cmap->n; j++) {
875d0f46423SBarry Smith         for (i=0; i<mdn->A->rmap->n; i++) {
87667e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
8773501a2bdSLois Curfman McInnes         }
8783501a2bdSLois Curfman McInnes       }
879b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
880d0f46423SBarry Smith       for (j=0; j<A->cmap->N; j++) {
881064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8823501a2bdSLois Curfman McInnes       }
8838627564fSBarry Smith       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
884d0f46423SBarry Smith       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
8853a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
886329f5518SBarry Smith       PetscReal ntemp;
8873501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
888b2566f29SBarry Smith       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
8903501a2bdSLois Curfman McInnes   }
8913a40ed3dSBarry Smith   PetscFunctionReturn(0);
8923501a2bdSLois Curfman McInnes }
8933501a2bdSLois Curfman McInnes 
894fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
8953501a2bdSLois Curfman McInnes {
8963501a2bdSLois Curfman McInnes   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
8973501a2bdSLois Curfman McInnes   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
8983501a2bdSLois Curfman McInnes   Mat            B;
899d0f46423SBarry Smith   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
9006849ba73SBarry Smith   PetscErrorCode ierr;
90113f74950SBarry Smith   PetscInt       j,i;
90287828ca2SBarry Smith   PetscScalar    *v;
9033501a2bdSLois Curfman McInnes 
9043a40ed3dSBarry Smith   PetscFunctionBegin;
905cf37664fSBarry Smith   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
906cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
907ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
908d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
9097adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
9100298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
911fc4dec0aSBarry Smith   } else {
912fc4dec0aSBarry Smith     B = *matout;
913fc4dec0aSBarry Smith   }
9143501a2bdSLois Curfman McInnes 
915d0f46423SBarry Smith   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
916785e854fSJed Brown   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
9173501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
9181acff37aSSatish Balay   for (j=0; j<n; j++) {
9193501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
9203501a2bdSLois Curfman McInnes     v   += m;
9213501a2bdSLois Curfman McInnes   }
922606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
9236d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9246d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
9263501a2bdSLois Curfman McInnes     *matout = B;
9273501a2bdSLois Curfman McInnes   } else {
92828be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
9293501a2bdSLois Curfman McInnes   }
9303a40ed3dSBarry Smith   PetscFunctionReturn(0);
931096963f5SLois Curfman McInnes }
932096963f5SLois Curfman McInnes 
93344cd7ae7SLois Curfman McInnes 
9346849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
935d84338a6SBarry Smith extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
9368965ea79SLois Curfman McInnes 
9374994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A)
938273d9f13SBarry Smith {
939dfbe8321SBarry Smith   PetscErrorCode ierr;
940273d9f13SBarry Smith 
941273d9f13SBarry Smith   PetscFunctionBegin;
942273d9f13SBarry Smith   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
943273d9f13SBarry Smith   PetscFunctionReturn(0);
944273d9f13SBarry Smith }
945273d9f13SBarry Smith 
946488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
947488007eeSBarry Smith {
948488007eeSBarry Smith   PetscErrorCode ierr;
949488007eeSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
950488007eeSBarry Smith 
951488007eeSBarry Smith   PetscFunctionBegin;
952488007eeSBarry Smith   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
953a3fa217bSJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
954488007eeSBarry Smith   PetscFunctionReturn(0);
955488007eeSBarry Smith }
956488007eeSBarry Smith 
9577087cfbeSBarry Smith PetscErrorCode  MatConjugate_MPIDense(Mat mat)
958ba337c44SJed Brown {
959ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
960ba337c44SJed Brown   PetscErrorCode ierr;
961ba337c44SJed Brown 
962ba337c44SJed Brown   PetscFunctionBegin;
963ba337c44SJed Brown   ierr = MatConjugate(a->A);CHKERRQ(ierr);
964ba337c44SJed Brown   PetscFunctionReturn(0);
965ba337c44SJed Brown }
966ba337c44SJed Brown 
967ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A)
968ba337c44SJed Brown {
969ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
970ba337c44SJed Brown   PetscErrorCode ierr;
971ba337c44SJed Brown 
972ba337c44SJed Brown   PetscFunctionBegin;
973ba337c44SJed Brown   ierr = MatRealPart(a->A);CHKERRQ(ierr);
974ba337c44SJed Brown   PetscFunctionReturn(0);
975ba337c44SJed Brown }
976ba337c44SJed Brown 
977ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
978ba337c44SJed Brown {
979ba337c44SJed Brown   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
980ba337c44SJed Brown   PetscErrorCode ierr;
981ba337c44SJed Brown 
982ba337c44SJed Brown   PetscFunctionBegin;
983ba337c44SJed Brown   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
984ba337c44SJed Brown   PetscFunctionReturn(0);
985ba337c44SJed Brown }
986ba337c44SJed Brown 
9870716a85fSBarry Smith extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
9880716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
9890716a85fSBarry Smith {
9900716a85fSBarry Smith   PetscErrorCode ierr;
9910716a85fSBarry Smith   PetscInt       i,n;
9920716a85fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
9930716a85fSBarry Smith   PetscReal      *work;
9940716a85fSBarry Smith 
9950716a85fSBarry Smith   PetscFunctionBegin;
9960298fd71SBarry Smith   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
997785e854fSJed Brown   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
9980716a85fSBarry Smith   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
9990716a85fSBarry Smith   if (type == NORM_2) {
10000716a85fSBarry Smith     for (i=0; i<n; i++) work[i] *= work[i];
10010716a85fSBarry Smith   }
10020716a85fSBarry Smith   if (type == NORM_INFINITY) {
1003b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
10040716a85fSBarry Smith   } else {
1005b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
10060716a85fSBarry Smith   }
10070716a85fSBarry Smith   ierr = PetscFree(work);CHKERRQ(ierr);
10080716a85fSBarry Smith   if (type == NORM_2) {
10098f1a2a5eSBarry Smith     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
10100716a85fSBarry Smith   }
10110716a85fSBarry Smith   PetscFunctionReturn(0);
10120716a85fSBarry Smith }
10130716a85fSBarry Smith 
101473a71a0fSBarry Smith static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
101573a71a0fSBarry Smith {
101673a71a0fSBarry Smith   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
101773a71a0fSBarry Smith   PetscErrorCode ierr;
101873a71a0fSBarry Smith   PetscScalar    *a;
101973a71a0fSBarry Smith   PetscInt       m,n,i;
102073a71a0fSBarry Smith 
102173a71a0fSBarry Smith   PetscFunctionBegin;
102273a71a0fSBarry Smith   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
10238c778c55SBarry Smith   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
102473a71a0fSBarry Smith   for (i=0; i<m*n; i++) {
102573a71a0fSBarry Smith     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
102673a71a0fSBarry Smith   }
10278c778c55SBarry Smith   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
102873a71a0fSBarry Smith   PetscFunctionReturn(0);
102973a71a0fSBarry Smith }
103073a71a0fSBarry Smith 
1031fd4e9aacSBarry Smith extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1032fd4e9aacSBarry Smith 
10333b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
10343b49f96aSBarry Smith {
10353b49f96aSBarry Smith   PetscFunctionBegin;
10363b49f96aSBarry Smith   *missing = PETSC_FALSE;
10373b49f96aSBarry Smith   PetscFunctionReturn(0);
10383b49f96aSBarry Smith }
10393b49f96aSBarry Smith 
10408965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
104109dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
104209dc0095SBarry Smith                                         MatGetRow_MPIDense,
104309dc0095SBarry Smith                                         MatRestoreRow_MPIDense,
104409dc0095SBarry Smith                                         MatMult_MPIDense,
104597304618SKris Buschelman                                 /*  4*/ MatMultAdd_MPIDense,
10467c922b88SBarry Smith                                         MatMultTranspose_MPIDense,
10477c922b88SBarry Smith                                         MatMultTransposeAdd_MPIDense,
10488965ea79SLois Curfman McInnes                                         0,
104909dc0095SBarry Smith                                         0,
105009dc0095SBarry Smith                                         0,
105197304618SKris Buschelman                                 /* 10*/ 0,
105209dc0095SBarry Smith                                         0,
105309dc0095SBarry Smith                                         0,
105409dc0095SBarry Smith                                         0,
105509dc0095SBarry Smith                                         MatTranspose_MPIDense,
105697304618SKris Buschelman                                 /* 15*/ MatGetInfo_MPIDense,
10576e4ee0c6SHong Zhang                                         MatEqual_MPIDense,
105809dc0095SBarry Smith                                         MatGetDiagonal_MPIDense,
10595b2fa520SLois Curfman McInnes                                         MatDiagonalScale_MPIDense,
106009dc0095SBarry Smith                                         MatNorm_MPIDense,
106197304618SKris Buschelman                                 /* 20*/ MatAssemblyBegin_MPIDense,
106209dc0095SBarry Smith                                         MatAssemblyEnd_MPIDense,
106309dc0095SBarry Smith                                         MatSetOption_MPIDense,
106409dc0095SBarry Smith                                         MatZeroEntries_MPIDense,
1065d519adbfSMatthew Knepley                                 /* 24*/ MatZeroRows_MPIDense,
1066919b68f7SBarry Smith                                         0,
106701b82886SBarry Smith                                         0,
106801b82886SBarry Smith                                         0,
106901b82886SBarry Smith                                         0,
10704994cf47SJed Brown                                 /* 29*/ MatSetUp_MPIDense,
1071273d9f13SBarry Smith                                         0,
107209dc0095SBarry Smith                                         0,
1073c56a70eeSBarry Smith                                         MatGetDiagonalBlock_MPIDense,
10748c778c55SBarry Smith                                         0,
1075d519adbfSMatthew Knepley                                 /* 34*/ MatDuplicate_MPIDense,
107609dc0095SBarry Smith                                         0,
107709dc0095SBarry Smith                                         0,
107809dc0095SBarry Smith                                         0,
107909dc0095SBarry Smith                                         0,
1080d519adbfSMatthew Knepley                                 /* 39*/ MatAXPY_MPIDense,
1081*7dae84e0SHong Zhang                                         MatCreateSubMatrices_MPIDense,
108209dc0095SBarry Smith                                         0,
108309dc0095SBarry Smith                                         MatGetValues_MPIDense,
108409dc0095SBarry Smith                                         0,
1085d519adbfSMatthew Knepley                                 /* 44*/ 0,
108609dc0095SBarry Smith                                         MatScale_MPIDense,
10877d68702bSBarry Smith                                         MatShift_Basic,
108809dc0095SBarry Smith                                         0,
108909dc0095SBarry Smith                                         0,
109073a71a0fSBarry Smith                                 /* 49*/ MatSetRandom_MPIDense,
109109dc0095SBarry Smith                                         0,
109209dc0095SBarry Smith                                         0,
109309dc0095SBarry Smith                                         0,
109409dc0095SBarry Smith                                         0,
1095d519adbfSMatthew Knepley                                 /* 54*/ 0,
109609dc0095SBarry Smith                                         0,
109709dc0095SBarry Smith                                         0,
109809dc0095SBarry Smith                                         0,
109909dc0095SBarry Smith                                         0,
1100*7dae84e0SHong Zhang                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1101b9b97703SBarry Smith                                         MatDestroy_MPIDense,
1102b9b97703SBarry Smith                                         MatView_MPIDense,
1103357abbc8SBarry Smith                                         0,
110497304618SKris Buschelman                                         0,
1105d519adbfSMatthew Knepley                                 /* 64*/ 0,
110697304618SKris Buschelman                                         0,
110797304618SKris Buschelman                                         0,
110897304618SKris Buschelman                                         0,
110997304618SKris Buschelman                                         0,
1110d519adbfSMatthew Knepley                                 /* 69*/ 0,
111197304618SKris Buschelman                                         0,
111297304618SKris Buschelman                                         0,
111397304618SKris Buschelman                                         0,
111497304618SKris Buschelman                                         0,
1115d519adbfSMatthew Knepley                                 /* 74*/ 0,
111697304618SKris Buschelman                                         0,
111797304618SKris Buschelman                                         0,
111897304618SKris Buschelman                                         0,
111997304618SKris Buschelman                                         0,
1120d519adbfSMatthew Knepley                                 /* 79*/ 0,
112197304618SKris Buschelman                                         0,
112297304618SKris Buschelman                                         0,
112397304618SKris Buschelman                                         0,
11245bba2384SShri Abhyankar                                 /* 83*/ MatLoad_MPIDense,
1125865e5f61SKris Buschelman                                         0,
1126865e5f61SKris Buschelman                                         0,
1127865e5f61SKris Buschelman                                         0,
1128865e5f61SKris Buschelman                                         0,
1129865e5f61SKris Buschelman                                         0,
11309775878aSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1131320f2790SHong Zhang                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1132320f2790SHong Zhang                                         MatMatMultSymbolic_MPIDense_MPIDense,
11339775878aSHong Zhang #else
11349775878aSHong Zhang                                 /* 89*/ 0,
1135865e5f61SKris Buschelman                                         0,
11369775878aSHong Zhang #endif
1137fd4e9aacSBarry Smith                                         MatMatMultNumeric_MPIDense,
11382fbe02b9SBarry Smith                                         0,
1139ba337c44SJed Brown                                         0,
1140d519adbfSMatthew Knepley                                 /* 94*/ 0,
1141865e5f61SKris Buschelman                                         0,
1142865e5f61SKris Buschelman                                         0,
1143ba337c44SJed Brown                                         0,
1144ba337c44SJed Brown                                         0,
1145ba337c44SJed Brown                                 /* 99*/ 0,
1146ba337c44SJed Brown                                         0,
1147ba337c44SJed Brown                                         0,
1148ba337c44SJed Brown                                         MatConjugate_MPIDense,
1149ba337c44SJed Brown                                         0,
1150ba337c44SJed Brown                                 /*104*/ 0,
1151ba337c44SJed Brown                                         MatRealPart_MPIDense,
1152ba337c44SJed Brown                                         MatImaginaryPart_MPIDense,
115386d161a7SShri Abhyankar                                         0,
115486d161a7SShri Abhyankar                                         0,
115586d161a7SShri Abhyankar                                 /*109*/ 0,
115686d161a7SShri Abhyankar                                         0,
115786d161a7SShri Abhyankar                                         0,
115886d161a7SShri Abhyankar                                         0,
11593b49f96aSBarry Smith                                         MatMissingDiagonal_MPIDense,
116086d161a7SShri Abhyankar                                 /*114*/ 0,
116186d161a7SShri Abhyankar                                         0,
116286d161a7SShri Abhyankar                                         0,
116386d161a7SShri Abhyankar                                         0,
116486d161a7SShri Abhyankar                                         0,
116586d161a7SShri Abhyankar                                 /*119*/ 0,
116686d161a7SShri Abhyankar                                         0,
116786d161a7SShri Abhyankar                                         0,
11680716a85fSBarry Smith                                         0,
11690716a85fSBarry Smith                                         0,
11700716a85fSBarry Smith                                 /*124*/ 0,
11713964eb88SJed Brown                                         MatGetColumnNorms_MPIDense,
11723964eb88SJed Brown                                         0,
11733964eb88SJed Brown                                         0,
11743964eb88SJed Brown                                         0,
11753964eb88SJed Brown                                 /*129*/ 0,
1176cb20be35SHong Zhang                                         MatTransposeMatMult_MPIDense_MPIDense,
1177cb20be35SHong Zhang                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1178cb20be35SHong Zhang                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
11793964eb88SJed Brown                                         0,
11803964eb88SJed Brown                                 /*134*/ 0,
11813964eb88SJed Brown                                         0,
11823964eb88SJed Brown                                         0,
11833964eb88SJed Brown                                         0,
11843964eb88SJed Brown                                         0,
11853964eb88SJed Brown                                 /*139*/ 0,
1186f9426fe0SMark Adams                                         0,
11873964eb88SJed Brown                                         0
1188ba337c44SJed Brown };
11898965ea79SLois Curfman McInnes 
11907087cfbeSBarry Smith PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1191a23d5eceSKris Buschelman {
1192a23d5eceSKris Buschelman   Mat_MPIDense   *a;
1193dfbe8321SBarry Smith   PetscErrorCode ierr;
1194a23d5eceSKris Buschelman 
1195a23d5eceSKris Buschelman   PetscFunctionBegin;
1196a23d5eceSKris Buschelman   mat->preallocated = PETSC_TRUE;
1197a23d5eceSKris Buschelman   /* Note:  For now, when data is specified above, this assumes the user correctly
1198a23d5eceSKris Buschelman    allocates the local dense storage space.  We should add error checking. */
1199a23d5eceSKris Buschelman 
1200a23d5eceSKris Buschelman   a       = (Mat_MPIDense*)mat->data;
120134ef9618SShri Abhyankar   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
120234ef9618SShri Abhyankar   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
120334ef9618SShri Abhyankar   a->nvec = mat->cmap->n;
120434ef9618SShri Abhyankar 
1205f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1206d0f46423SBarry Smith   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
12075c5985e7SKris Buschelman   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
12085c5985e7SKris Buschelman   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
12093bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1210a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1211a23d5eceSKris Buschelman }
1212a23d5eceSKris Buschelman 
121365b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1214cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
12158baccfbdSHong Zhang {
12168ea901baSHong Zhang   Mat            mat_elemental;
12178ea901baSHong Zhang   PetscErrorCode ierr;
121832d7a744SHong Zhang   PetscScalar    *v;
121932d7a744SHong Zhang   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
12208ea901baSHong Zhang 
12218baccfbdSHong Zhang   PetscFunctionBegin;
1222378336b6SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
1223378336b6SHong Zhang     mat_elemental = *newmat;
1224378336b6SHong Zhang     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1225378336b6SHong Zhang   } else {
1226378336b6SHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1227378336b6SHong Zhang     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1228378336b6SHong Zhang     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1229378336b6SHong Zhang     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
123032d7a744SHong Zhang     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1231378336b6SHong Zhang   }
1232378336b6SHong Zhang 
123332d7a744SHong Zhang   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
123432d7a744SHong Zhang   for (i=0; i<N; i++) cols[i] = i;
123532d7a744SHong Zhang   for (i=0; i<m; i++) rows[i] = rstart + i;
12368ea901baSHong Zhang 
12378ea901baSHong Zhang   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
123832d7a744SHong Zhang   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
123932d7a744SHong Zhang   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
12408ea901baSHong Zhang   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
12418ea901baSHong Zhang   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124232d7a744SHong Zhang   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
124332d7a744SHong Zhang   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
12448ea901baSHong Zhang 
1245511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
124628be2f97SBarry Smith     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
12478ea901baSHong Zhang   } else {
12488ea901baSHong Zhang     *newmat = mat_elemental;
12498ea901baSHong Zhang   }
12508baccfbdSHong Zhang   PetscFunctionReturn(0);
12518baccfbdSHong Zhang }
125265b80a83SHong Zhang #endif
12538baccfbdSHong Zhang 
12548cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1255273d9f13SBarry Smith {
1256273d9f13SBarry Smith   Mat_MPIDense   *a;
1257dfbe8321SBarry Smith   PetscErrorCode ierr;
1258273d9f13SBarry Smith 
1259273d9f13SBarry Smith   PetscFunctionBegin;
1260b00a9115SJed Brown   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1261b0a32e0cSBarry Smith   mat->data = (void*)a;
1262273d9f13SBarry Smith   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1263273d9f13SBarry Smith 
1264273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
1265ce94432eSBarry Smith   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1266ce94432eSBarry Smith   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1267273d9f13SBarry Smith 
1268273d9f13SBarry Smith   /* build cache for off array entries formed */
1269273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
12702205254eSKarl Rupp 
1271ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1272273d9f13SBarry Smith 
1273273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1274273d9f13SBarry Smith   a->lvec        = 0;
1275273d9f13SBarry Smith   a->Mvctx       = 0;
1276273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1277273d9f13SBarry Smith 
1278bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1279bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
12808baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
12818baccfbdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
12828baccfbdSHong Zhang #endif
1283bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1284bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1285bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
12878949adfdSHong Zhang 
12888949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
12898949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
12908949adfdSHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
129138aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1292273d9f13SBarry Smith   PetscFunctionReturn(0);
1293273d9f13SBarry Smith }
1294273d9f13SBarry Smith 
1295209238afSKris Buschelman /*MC
1296002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1297209238afSKris Buschelman 
1298209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1299209238afSKris Buschelman    and MATMPIDENSE otherwise.
1300209238afSKris Buschelman 
1301209238afSKris Buschelman    Options Database Keys:
1302209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1303209238afSKris Buschelman 
1304209238afSKris Buschelman   Level: beginner
1305209238afSKris Buschelman 
130601b82886SBarry Smith 
1307209238afSKris Buschelman .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1308209238afSKris Buschelman M*/
1309209238afSKris Buschelman 
1310273d9f13SBarry Smith /*@C
1311273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1312273d9f13SBarry Smith 
1313273d9f13SBarry Smith    Not collective
1314273d9f13SBarry Smith 
1315273d9f13SBarry Smith    Input Parameters:
13161c4f3114SJed Brown .  B - the matrix
13170298fd71SBarry Smith -  data - optional location of matrix data.  Set data=NULL for PETSc
1318273d9f13SBarry Smith    to control all matrix memory allocation.
1319273d9f13SBarry Smith 
1320273d9f13SBarry Smith    Notes:
1321273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1322273d9f13SBarry Smith    storage by columns.
1323273d9f13SBarry Smith 
1324273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1325273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
13260298fd71SBarry Smith    set data=NULL.
1327273d9f13SBarry Smith 
1328273d9f13SBarry Smith    Level: intermediate
1329273d9f13SBarry Smith 
1330273d9f13SBarry Smith .keywords: matrix,dense, parallel
1331273d9f13SBarry Smith 
1332273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1333273d9f13SBarry Smith @*/
13341c4f3114SJed Brown PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1335273d9f13SBarry Smith {
13364ac538c5SBarry Smith   PetscErrorCode ierr;
1337273d9f13SBarry Smith 
1338273d9f13SBarry Smith   PetscFunctionBegin;
13391c4f3114SJed Brown   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1340273d9f13SBarry Smith   PetscFunctionReturn(0);
1341273d9f13SBarry Smith }
1342273d9f13SBarry Smith 
13438965ea79SLois Curfman McInnes /*@C
134469b1f4b7SBarry Smith    MatCreateDense - Creates a parallel matrix in dense format.
13458965ea79SLois Curfman McInnes 
1346db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1347db81eaa0SLois Curfman McInnes 
13488965ea79SLois Curfman McInnes    Input Parameters:
1349db81eaa0SLois Curfman McInnes +  comm - MPI communicator
13508965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1351db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
13528965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1353db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
13546cfe35ddSJose E. Roman -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1355dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
13568965ea79SLois Curfman McInnes 
13578965ea79SLois Curfman McInnes    Output Parameter:
1358477f1c0bSLois Curfman McInnes .  A - the matrix
13598965ea79SLois Curfman McInnes 
1360b259b22eSLois Curfman McInnes    Notes:
136139ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
136239ddd567SLois Curfman McInnes    storage by columns.
13638965ea79SLois Curfman McInnes 
136418f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
136518f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
13666cfe35ddSJose E. Roman    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
136718f449edSLois Curfman McInnes 
13688965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
13698965ea79SLois Curfman McInnes    (possibly both).
13708965ea79SLois Curfman McInnes 
1371027ccd11SLois Curfman McInnes    Level: intermediate
1372027ccd11SLois Curfman McInnes 
137339ddd567SLois Curfman McInnes .keywords: matrix,dense, parallel
13748965ea79SLois Curfman McInnes 
137539ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
13768965ea79SLois Curfman McInnes @*/
137769b1f4b7SBarry Smith PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
13788965ea79SLois Curfman McInnes {
13796849ba73SBarry Smith   PetscErrorCode ierr;
138013f74950SBarry Smith   PetscMPIInt    size;
13818965ea79SLois Curfman McInnes 
13823a40ed3dSBarry Smith   PetscFunctionBegin;
1383f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1384f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1385273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1386273d9f13SBarry Smith   if (size > 1) {
1387273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1388273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
13896cfe35ddSJose E. Roman     if (data) {  /* user provided data array, so no need to assemble */
13906cfe35ddSJose E. Roman       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
13916cfe35ddSJose E. Roman       (*A)->assembled = PETSC_TRUE;
13926cfe35ddSJose E. Roman     }
1393273d9f13SBarry Smith   } else {
1394273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1395273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
13968c469469SLois Curfman McInnes   }
13973a40ed3dSBarry Smith   PetscFunctionReturn(0);
13988965ea79SLois Curfman McInnes }
13998965ea79SLois Curfman McInnes 
14006849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
14018965ea79SLois Curfman McInnes {
14028965ea79SLois Curfman McInnes   Mat            mat;
14033501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1404dfbe8321SBarry Smith   PetscErrorCode ierr;
14058965ea79SLois Curfman McInnes 
14063a40ed3dSBarry Smith   PetscFunctionBegin;
14078965ea79SLois Curfman McInnes   *newmat = 0;
1408ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1409d0f46423SBarry Smith   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
14107adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1411834f8fabSBarry Smith   a       = (Mat_MPIDense*)mat->data;
1412e04c1aa4SHong Zhang   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
14135aa7edbeSHong Zhang 
1414d5f3da31SBarry Smith   mat->factortype   = A->factortype;
1415c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1416273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
14178965ea79SLois Curfman McInnes 
14188965ea79SLois Curfman McInnes   a->size         = oldmat->size;
14198965ea79SLois Curfman McInnes   a->rank         = oldmat->rank;
1420e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
1421b9b97703SBarry Smith   a->nvec         = oldmat->nvec;
14223782ba37SSatish Balay   a->donotstash   = oldmat->donotstash;
1423e04c1aa4SHong Zhang 
14241e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
14251e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
14268965ea79SLois Curfman McInnes 
1427329f5518SBarry Smith   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
14285609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
14293bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
143001b82886SBarry Smith 
14318965ea79SLois Curfman McInnes   *newmat = mat;
14323a40ed3dSBarry Smith   PetscFunctionReturn(0);
14338965ea79SLois Curfman McInnes }
14348965ea79SLois Curfman McInnes 
14359d36ed5fSBarry Smith PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
143686d161a7SShri Abhyankar {
143786d161a7SShri Abhyankar   PetscErrorCode ierr;
143886d161a7SShri Abhyankar   PetscMPIInt    rank,size;
143974c13d95SBarry Smith   const PetscInt *rowners;
144074c13d95SBarry Smith   PetscInt       i,m,n,nz,j,mMax;
144186d161a7SShri Abhyankar   PetscScalar    *array,*vals,*vals_ptr;
14429d36ed5fSBarry Smith   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
144386d161a7SShri Abhyankar 
144486d161a7SShri Abhyankar   PetscFunctionBegin;
144586d161a7SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
144686d161a7SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
144786d161a7SShri Abhyankar 
144874c13d95SBarry Smith   /* determine ownership of rows and columns */
144974c13d95SBarry Smith   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
145074c13d95SBarry Smith   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
145186d161a7SShri Abhyankar 
145274c13d95SBarry Smith   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
14539d36ed5fSBarry Smith   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
14540298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
14559d36ed5fSBarry Smith   }
14568c778c55SBarry Smith   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
145749467e7dSBarry Smith   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
145874c13d95SBarry Smith   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1459396e826eSBarry Smith   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
146086d161a7SShri Abhyankar   if (!rank) {
14619d36ed5fSBarry Smith     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
146286d161a7SShri Abhyankar 
146386d161a7SShri Abhyankar     /* read in my part of the matrix numerical values  */
146486d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
146586d161a7SShri Abhyankar 
146686d161a7SShri Abhyankar     /* insert into matrix-by row (this is why cannot directly read into array */
146786d161a7SShri Abhyankar     vals_ptr = vals;
146886d161a7SShri Abhyankar     for (i=0; i<m; i++) {
146986d161a7SShri Abhyankar       for (j=0; j<N; j++) {
147086d161a7SShri Abhyankar         array[i + j*m] = *vals_ptr++;
147186d161a7SShri Abhyankar       }
147286d161a7SShri Abhyankar     }
147386d161a7SShri Abhyankar 
147486d161a7SShri Abhyankar     /* read in other processors and ship out */
147586d161a7SShri Abhyankar     for (i=1; i<size; i++) {
147686d161a7SShri Abhyankar       nz   = (rowners[i+1] - rowners[i])*N;
147786d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1478a25532f0SBarry Smith       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
147986d161a7SShri Abhyankar     }
148086d161a7SShri Abhyankar   } else {
148186d161a7SShri Abhyankar     /* receive numeric values */
1482785e854fSJed Brown     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
148386d161a7SShri Abhyankar 
148486d161a7SShri Abhyankar     /* receive message of values*/
1485a25532f0SBarry Smith     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
148686d161a7SShri Abhyankar 
148786d161a7SShri Abhyankar     /* insert into matrix-by row (this is why cannot directly read into array */
148886d161a7SShri Abhyankar     vals_ptr = vals;
148986d161a7SShri Abhyankar     for (i=0; i<m; i++) {
149086d161a7SShri Abhyankar       for (j=0; j<N; j++) {
149186d161a7SShri Abhyankar         array[i + j*m] = *vals_ptr++;
149286d161a7SShri Abhyankar       }
149386d161a7SShri Abhyankar     }
149486d161a7SShri Abhyankar   }
14958c778c55SBarry Smith   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
149686d161a7SShri Abhyankar   ierr = PetscFree(vals);CHKERRQ(ierr);
149786d161a7SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
149886d161a7SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
149986d161a7SShri Abhyankar   PetscFunctionReturn(0);
150086d161a7SShri Abhyankar }
150186d161a7SShri Abhyankar 
1502112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
150386d161a7SShri Abhyankar {
1504dfdea288SBarry Smith   Mat_MPIDense   *a;
150586d161a7SShri Abhyankar   PetscScalar    *vals,*svals;
1506ce94432eSBarry Smith   MPI_Comm       comm;
150786d161a7SShri Abhyankar   MPI_Status     status;
150849467e7dSBarry Smith   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
150986d161a7SShri Abhyankar   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
151049467e7dSBarry Smith   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
15119d36ed5fSBarry Smith   PetscInt       i,nz,j,rstart,rend;
151286d161a7SShri Abhyankar   int            fd;
151386d161a7SShri Abhyankar   PetscErrorCode ierr;
151486d161a7SShri Abhyankar 
151586d161a7SShri Abhyankar   PetscFunctionBegin;
1516c98fd787SBarry Smith   /* force binary viewer to load .info file if it has not yet done so */
1517c98fd787SBarry Smith   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1518ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
151986d161a7SShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
152086d161a7SShri Abhyankar   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
152186d161a7SShri Abhyankar   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
15225872f025SBarry Smith   if (!rank) {
152386d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
152486d161a7SShri Abhyankar     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
152586d161a7SShri Abhyankar   }
152686d161a7SShri Abhyankar   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
152786d161a7SShri Abhyankar   M    = header[1]; N = header[2]; nz = header[3];
152886d161a7SShri Abhyankar 
152986d161a7SShri Abhyankar   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
15309d36ed5fSBarry Smith   if (newmat->rmap->N < 0) newmat->rmap->N = M;
15319d36ed5fSBarry Smith   if (newmat->cmap->N < 0) newmat->cmap->N = N;
153286d161a7SShri Abhyankar 
15339d36ed5fSBarry Smith   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
15349d36ed5fSBarry Smith   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
153586d161a7SShri Abhyankar 
153686d161a7SShri Abhyankar   /*
153786d161a7SShri Abhyankar        Handle case where matrix is stored on disk as a dense matrix
153886d161a7SShri Abhyankar   */
153986d161a7SShri Abhyankar   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
15409d36ed5fSBarry Smith     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
154186d161a7SShri Abhyankar     PetscFunctionReturn(0);
154286d161a7SShri Abhyankar   }
154386d161a7SShri Abhyankar 
154486d161a7SShri Abhyankar   /* determine ownership of all rows */
15452205254eSKarl Rupp   if (newmat->rmap->n < 0) {
15462205254eSKarl Rupp     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
15472205254eSKarl Rupp   } else {
15482205254eSKarl Rupp     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
15492205254eSKarl Rupp   }
155049467e7dSBarry Smith   if (newmat->cmap->n < 0) {
155149467e7dSBarry Smith     n = PETSC_DECIDE;
155249467e7dSBarry Smith   } else {
155349467e7dSBarry Smith     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
155449467e7dSBarry Smith   }
155549467e7dSBarry Smith 
1556854ce69bSBarry Smith   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
155786d161a7SShri Abhyankar   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
155886d161a7SShri Abhyankar   rowners[0] = 0;
155986d161a7SShri Abhyankar   for (i=2; i<=size; i++) {
156086d161a7SShri Abhyankar     rowners[i] += rowners[i-1];
156186d161a7SShri Abhyankar   }
156286d161a7SShri Abhyankar   rstart = rowners[rank];
156386d161a7SShri Abhyankar   rend   = rowners[rank+1];
156486d161a7SShri Abhyankar 
156586d161a7SShri Abhyankar   /* distribute row lengths to all processors */
156649467e7dSBarry Smith   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
156786d161a7SShri Abhyankar   if (!rank) {
1568785e854fSJed Brown     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
156986d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1570785e854fSJed Brown     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
157186d161a7SShri Abhyankar     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
157286d161a7SShri Abhyankar     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
157386d161a7SShri Abhyankar     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
157486d161a7SShri Abhyankar   } else {
157586d161a7SShri Abhyankar     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
157686d161a7SShri Abhyankar   }
157786d161a7SShri Abhyankar 
157886d161a7SShri Abhyankar   if (!rank) {
157986d161a7SShri Abhyankar     /* calculate the number of nonzeros on each processor */
1580785e854fSJed Brown     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
158186d161a7SShri Abhyankar     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
158286d161a7SShri Abhyankar     for (i=0; i<size; i++) {
158386d161a7SShri Abhyankar       for (j=rowners[i]; j< rowners[i+1]; j++) {
158486d161a7SShri Abhyankar         procsnz[i] += rowlengths[j];
158586d161a7SShri Abhyankar       }
158686d161a7SShri Abhyankar     }
158786d161a7SShri Abhyankar     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
158886d161a7SShri Abhyankar 
158986d161a7SShri Abhyankar     /* determine max buffer needed and allocate it */
159086d161a7SShri Abhyankar     maxnz = 0;
159186d161a7SShri Abhyankar     for (i=0; i<size; i++) {
159286d161a7SShri Abhyankar       maxnz = PetscMax(maxnz,procsnz[i]);
159386d161a7SShri Abhyankar     }
1594785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
159586d161a7SShri Abhyankar 
159686d161a7SShri Abhyankar     /* read in my part of the matrix column indices  */
159786d161a7SShri Abhyankar     nz   = procsnz[0];
1598785e854fSJed Brown     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
159986d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
160086d161a7SShri Abhyankar 
160186d161a7SShri Abhyankar     /* read in every one elses and ship off */
160286d161a7SShri Abhyankar     for (i=1; i<size; i++) {
160386d161a7SShri Abhyankar       nz   = procsnz[i];
160486d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
160586d161a7SShri Abhyankar       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
160686d161a7SShri Abhyankar     }
160786d161a7SShri Abhyankar     ierr = PetscFree(cols);CHKERRQ(ierr);
160886d161a7SShri Abhyankar   } else {
160986d161a7SShri Abhyankar     /* determine buffer space needed for message */
161086d161a7SShri Abhyankar     nz = 0;
161186d161a7SShri Abhyankar     for (i=0; i<m; i++) {
161286d161a7SShri Abhyankar       nz += ourlens[i];
161386d161a7SShri Abhyankar     }
1614854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
161586d161a7SShri Abhyankar 
161686d161a7SShri Abhyankar     /* receive message of column indices*/
161786d161a7SShri Abhyankar     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
161886d161a7SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
161986d161a7SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
162086d161a7SShri Abhyankar   }
162186d161a7SShri Abhyankar 
162249467e7dSBarry Smith   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1623dfdea288SBarry Smith   a = (Mat_MPIDense*)newmat->data;
16242e3566b0SBarry Smith   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
16250298fd71SBarry Smith     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1626dfdea288SBarry Smith   }
162786d161a7SShri Abhyankar 
162886d161a7SShri Abhyankar   if (!rank) {
1629785e854fSJed Brown     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
163086d161a7SShri Abhyankar 
163186d161a7SShri Abhyankar     /* read in my part of the matrix numerical values  */
163286d161a7SShri Abhyankar     nz   = procsnz[0];
163386d161a7SShri Abhyankar     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
163486d161a7SShri Abhyankar 
163586d161a7SShri Abhyankar     /* insert into matrix */
163686d161a7SShri Abhyankar     jj      = rstart;
163786d161a7SShri Abhyankar     smycols = mycols;
163886d161a7SShri Abhyankar     svals   = vals;
163986d161a7SShri Abhyankar     for (i=0; i<m; i++) {
164086d161a7SShri Abhyankar       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
164186d161a7SShri Abhyankar       smycols += ourlens[i];
164286d161a7SShri Abhyankar       svals   += ourlens[i];
164386d161a7SShri Abhyankar       jj++;
164486d161a7SShri Abhyankar     }
164586d161a7SShri Abhyankar 
164686d161a7SShri Abhyankar     /* read in other processors and ship out */
164786d161a7SShri Abhyankar     for (i=1; i<size; i++) {
164886d161a7SShri Abhyankar       nz   = procsnz[i];
164986d161a7SShri Abhyankar       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
165086d161a7SShri Abhyankar       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
165186d161a7SShri Abhyankar     }
165286d161a7SShri Abhyankar     ierr = PetscFree(procsnz);CHKERRQ(ierr);
165386d161a7SShri Abhyankar   } else {
165486d161a7SShri Abhyankar     /* receive numeric values */
1655854ce69bSBarry Smith     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
165686d161a7SShri Abhyankar 
165786d161a7SShri Abhyankar     /* receive message of values*/
165886d161a7SShri Abhyankar     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
165986d161a7SShri Abhyankar     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
166086d161a7SShri Abhyankar     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
166186d161a7SShri Abhyankar 
166286d161a7SShri Abhyankar     /* insert into matrix */
166386d161a7SShri Abhyankar     jj      = rstart;
166486d161a7SShri Abhyankar     smycols = mycols;
166586d161a7SShri Abhyankar     svals   = vals;
166686d161a7SShri Abhyankar     for (i=0; i<m; i++) {
166786d161a7SShri Abhyankar       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
166886d161a7SShri Abhyankar       smycols += ourlens[i];
166986d161a7SShri Abhyankar       svals   += ourlens[i];
167086d161a7SShri Abhyankar       jj++;
167186d161a7SShri Abhyankar     }
167286d161a7SShri Abhyankar   }
167349467e7dSBarry Smith   ierr = PetscFree(ourlens);CHKERRQ(ierr);
167486d161a7SShri Abhyankar   ierr = PetscFree(vals);CHKERRQ(ierr);
167586d161a7SShri Abhyankar   ierr = PetscFree(mycols);CHKERRQ(ierr);
167686d161a7SShri Abhyankar   ierr = PetscFree(rowners);CHKERRQ(ierr);
167786d161a7SShri Abhyankar 
167886d161a7SShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
167986d161a7SShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
168086d161a7SShri Abhyankar   PetscFunctionReturn(0);
168186d161a7SShri Abhyankar }
168286d161a7SShri Abhyankar 
1683ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
16846e4ee0c6SHong Zhang {
16856e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
16866e4ee0c6SHong Zhang   Mat            a,b;
1687ace3abfcSBarry Smith   PetscBool      flg;
16886e4ee0c6SHong Zhang   PetscErrorCode ierr;
168990ace30eSBarry Smith 
16906e4ee0c6SHong Zhang   PetscFunctionBegin;
16916e4ee0c6SHong Zhang   a    = matA->A;
16926e4ee0c6SHong Zhang   b    = matB->A;
16936e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1694b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
16956e4ee0c6SHong Zhang   PetscFunctionReturn(0);
16966e4ee0c6SHong Zhang }
169790ace30eSBarry Smith 
1698baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1699baa3c1c6SHong Zhang {
1700baa3c1c6SHong Zhang   PetscErrorCode        ierr;
1701baa3c1c6SHong Zhang   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1702baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = a->atbdense;
1703baa3c1c6SHong Zhang 
1704baa3c1c6SHong Zhang   PetscFunctionBegin;
1705c5ef1628SHong Zhang   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1706baa3c1c6SHong Zhang   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1707baa3c1c6SHong Zhang   ierr = PetscFree(atb);CHKERRQ(ierr);
1708baa3c1c6SHong Zhang   PetscFunctionReturn(0);
1709baa3c1c6SHong Zhang }
1710baa3c1c6SHong Zhang 
1711cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1712cb20be35SHong Zhang {
1713baa3c1c6SHong Zhang   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1714660d5466SHong Zhang   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1715baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb = c->atbdense;
1716cb20be35SHong Zhang   PetscErrorCode ierr;
1717cb20be35SHong Zhang   MPI_Comm       comm;
1718d5017740SHong Zhang   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1719c5ef1628SHong Zhang   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1720d5017740SHong Zhang   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1721660d5466SHong Zhang   PetscScalar    _DOne=1.0,_DZero=0.0;
1722adc7a786SHong Zhang   PetscBLASInt   am,an,bn,aN;
1723e68c0b26SHong Zhang   const PetscInt *ranges;
1724cb20be35SHong Zhang 
1725cb20be35SHong Zhang   PetscFunctionBegin;
1726cb20be35SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1727cb20be35SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1728cb20be35SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1729e68c0b26SHong Zhang 
1730c5ef1628SHong Zhang   /* compute atbarray = aseq^T * bseq */
1731660d5466SHong Zhang   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1732660d5466SHong Zhang   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1733660d5466SHong Zhang   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1734adc7a786SHong Zhang   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1735adc7a786SHong Zhang   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1736cb20be35SHong Zhang 
1737cb20be35SHong Zhang   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1738c5ef1628SHong Zhang   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1739cb20be35SHong Zhang 
1740660d5466SHong Zhang   /* arrange atbarray into sendbuf */
1741cb20be35SHong Zhang   k = 0;
1742cb20be35SHong Zhang   for (proc=0; proc<size; proc++) {
1743baa3c1c6SHong Zhang     for (j=0; j<cN; j++) {
1744c5ef1628SHong Zhang       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1745cb20be35SHong Zhang     }
1746cb20be35SHong Zhang   }
1747c5ef1628SHong Zhang   /* sum all atbarray to local values of C */
1748660d5466SHong Zhang   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
17493462b7efSHong Zhang   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1750660d5466SHong Zhang   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1751cb20be35SHong Zhang   PetscFunctionReturn(0);
1752cb20be35SHong Zhang }
1753cb20be35SHong Zhang 
1754cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1755cb20be35SHong Zhang {
1756cb20be35SHong Zhang   PetscErrorCode        ierr;
1757baa3c1c6SHong Zhang   Mat                   Cdense;
1758cb20be35SHong Zhang   MPI_Comm              comm;
1759baa3c1c6SHong Zhang   PetscMPIInt           size;
1760660d5466SHong Zhang   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1761baa3c1c6SHong Zhang   Mat_MPIDense          *c;
1762baa3c1c6SHong Zhang   Mat_TransMatMultDense *atb;
1763cb20be35SHong Zhang 
1764cb20be35SHong Zhang   PetscFunctionBegin;
1765baa3c1c6SHong Zhang   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1766cb20be35SHong Zhang   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1767cb20be35SHong Zhang     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1768cb20be35SHong Zhang   }
1769cb20be35SHong Zhang 
1770baa3c1c6SHong Zhang   /* create matrix product Cdense */
1771baa3c1c6SHong Zhang   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1772660d5466SHong Zhang   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1773baa3c1c6SHong Zhang   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1774baa3c1c6SHong Zhang   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1775baa3c1c6SHong Zhang   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776baa3c1c6SHong Zhang   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1777baa3c1c6SHong Zhang   *C   = Cdense;
1778baa3c1c6SHong Zhang 
1779baa3c1c6SHong Zhang   /* create data structure for reuse Cdense */
1780baa3c1c6SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1781baa3c1c6SHong Zhang   ierr = PetscNew(&atb);CHKERRQ(ierr);
1782660d5466SHong Zhang   cM = Cdense->rmap->N;
1783c5ef1628SHong Zhang   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1784baa3c1c6SHong Zhang 
1785baa3c1c6SHong Zhang   c                    = (Mat_MPIDense*)Cdense->data;
1786baa3c1c6SHong Zhang   c->atbdense          = atb;
1787baa3c1c6SHong Zhang   atb->destroy         = Cdense->ops->destroy;
1788baa3c1c6SHong Zhang   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1789cb20be35SHong Zhang   PetscFunctionReturn(0);
1790cb20be35SHong Zhang }
1791cb20be35SHong Zhang 
1792cb20be35SHong Zhang PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1793cb20be35SHong Zhang {
1794cb20be35SHong Zhang   PetscErrorCode ierr;
1795cb20be35SHong Zhang 
1796cb20be35SHong Zhang   PetscFunctionBegin;
1797cb20be35SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
1798cb20be35SHong Zhang     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1799cb20be35SHong Zhang   }
1800cb20be35SHong Zhang   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1801cb20be35SHong Zhang   PetscFunctionReturn(0);
1802cb20be35SHong Zhang }
1803320f2790SHong Zhang 
1804320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1805320f2790SHong Zhang {
1806320f2790SHong Zhang   PetscErrorCode   ierr;
1807320f2790SHong Zhang   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1808320f2790SHong Zhang   Mat_MatMultDense *ab = a->abdense;
1809320f2790SHong Zhang 
1810320f2790SHong Zhang   PetscFunctionBegin;
1811320f2790SHong Zhang   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1812320f2790SHong Zhang   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1813320f2790SHong Zhang   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1814320f2790SHong Zhang 
1815320f2790SHong Zhang   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1816320f2790SHong Zhang   ierr = PetscFree(ab);CHKERRQ(ierr);
1817320f2790SHong Zhang   PetscFunctionReturn(0);
1818320f2790SHong Zhang }
1819320f2790SHong Zhang 
18205242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
1821320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1822320f2790SHong Zhang {
1823320f2790SHong Zhang   PetscErrorCode   ierr;
1824320f2790SHong Zhang   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1825320f2790SHong Zhang   Mat_MatMultDense *ab=c->abdense;
1826320f2790SHong Zhang 
1827320f2790SHong Zhang   PetscFunctionBegin;
1828de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1829de0a22f0SHong Zhang   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1830320f2790SHong Zhang   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1831de0a22f0SHong Zhang   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1832320f2790SHong Zhang   PetscFunctionReturn(0);
1833320f2790SHong Zhang }
1834320f2790SHong Zhang 
1835320f2790SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1836320f2790SHong Zhang {
1837320f2790SHong Zhang   PetscErrorCode   ierr;
1838320f2790SHong Zhang   Mat              Ae,Be,Ce;
1839320f2790SHong Zhang   Mat_MPIDense     *c;
1840320f2790SHong Zhang   Mat_MatMultDense *ab;
1841320f2790SHong Zhang 
1842320f2790SHong Zhang   PetscFunctionBegin;
1843320f2790SHong Zhang   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1844378336b6SHong Zhang     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1845320f2790SHong Zhang   }
1846320f2790SHong Zhang 
1847320f2790SHong Zhang   /* convert A and B to Elemental matrices Ae and Be */
1848320f2790SHong Zhang   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1849320f2790SHong Zhang   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1850320f2790SHong Zhang 
1851320f2790SHong Zhang   /* Ce = Ae*Be */
1852320f2790SHong Zhang   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1853320f2790SHong Zhang   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1854320f2790SHong Zhang 
1855320f2790SHong Zhang   /* convert Ce to C */
1856320f2790SHong Zhang   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1857320f2790SHong Zhang 
1858320f2790SHong Zhang   /* create data structure for reuse Cdense */
1859320f2790SHong Zhang   ierr = PetscNew(&ab);CHKERRQ(ierr);
1860320f2790SHong Zhang   c                  = (Mat_MPIDense*)(*C)->data;
1861320f2790SHong Zhang   c->abdense         = ab;
1862320f2790SHong Zhang 
1863320f2790SHong Zhang   ab->Ae             = Ae;
1864320f2790SHong Zhang   ab->Be             = Be;
1865320f2790SHong Zhang   ab->Ce             = Ce;
1866320f2790SHong Zhang   ab->destroy        = (*C)->ops->destroy;
1867320f2790SHong Zhang   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
18689775878aSHong Zhang   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1869320f2790SHong Zhang   PetscFunctionReturn(0);
1870320f2790SHong Zhang }
1871320f2790SHong Zhang 
1872150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1873320f2790SHong Zhang {
1874320f2790SHong Zhang   PetscErrorCode ierr;
1875320f2790SHong Zhang 
1876320f2790SHong Zhang   PetscFunctionBegin;
1877320f2790SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
187857299f2fSHong Zhang     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1879320f2790SHong Zhang     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
188057299f2fSHong Zhang     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1881320f2790SHong Zhang   } else {
188257299f2fSHong Zhang     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1883320f2790SHong Zhang     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
188457299f2fSHong Zhang     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1885320f2790SHong Zhang   }
1886320f2790SHong Zhang   PetscFunctionReturn(0);
1887320f2790SHong Zhang }
18885242a7b1SHong Zhang #endif
1889