xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 62b4c0b38132a63aca20a38eff6e3f7572db1089)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
3ed3cc1f0SBarry Smith /*
4ed3cc1f0SBarry Smith    Basic functions for basic parallel dense matrices.
5ed3cc1f0SBarry Smith */
6ed3cc1f0SBarry Smith 
789ac3759SBarry Smith #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
88965ea79SLois Curfman McInnes 
9ba8c8a56SBarry Smith #undef __FUNCT__
10ab92ecdeSBarry Smith #define __FUNCT__ "MatDenseGetLocalMatrix"
11ab92ecdeSBarry Smith /*@
12ab92ecdeSBarry Smith 
13ab92ecdeSBarry Smith       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14ab92ecdeSBarry Smith               matrix that represents the operator. For sequential matrices it returns itself.
15ab92ecdeSBarry Smith 
16ab92ecdeSBarry Smith     Input Parameter:
17ab92ecdeSBarry Smith .      A - the Seq or MPI dense matrix
18ab92ecdeSBarry Smith 
19ab92ecdeSBarry Smith     Output Parameter:
20ab92ecdeSBarry Smith .      B - the inner matrix
21ab92ecdeSBarry Smith 
228e6c10adSSatish Balay     Level: intermediate
238e6c10adSSatish Balay 
24ab92ecdeSBarry Smith @*/
25ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26ab92ecdeSBarry Smith {
27ab92ecdeSBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28ab92ecdeSBarry Smith   PetscErrorCode ierr;
29ab92ecdeSBarry Smith   PetscTruth     flg;
30ab92ecdeSBarry Smith 
31ab92ecdeSBarry Smith   PetscFunctionBegin;
32ab92ecdeSBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33ab92ecdeSBarry Smith   if (flg) {
34ab92ecdeSBarry Smith     *B = mat->A;
35ab92ecdeSBarry Smith   } else {
36ab92ecdeSBarry Smith     *B = A;
37ab92ecdeSBarry Smith   }
38ab92ecdeSBarry Smith   PetscFunctionReturn(0);
39ab92ecdeSBarry Smith }
40ab92ecdeSBarry Smith 
41ab92ecdeSBarry Smith #undef __FUNCT__
42ba8c8a56SBarry Smith #define __FUNCT__ "MatGetRow_MPIDense"
43ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44ba8c8a56SBarry Smith {
45ba8c8a56SBarry Smith   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46ba8c8a56SBarry Smith   PetscErrorCode ierr;
47899cda47SBarry Smith   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
48ba8c8a56SBarry Smith 
49ba8c8a56SBarry Smith   PetscFunctionBegin;
50ba8c8a56SBarry Smith   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
51ba8c8a56SBarry Smith   lrow = row - rstart;
52ba8c8a56SBarry Smith   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53ba8c8a56SBarry Smith   PetscFunctionReturn(0);
54ba8c8a56SBarry Smith }
55ba8c8a56SBarry Smith 
56ba8c8a56SBarry Smith #undef __FUNCT__
57ba8c8a56SBarry Smith #define __FUNCT__ "MatRestoreRow_MPIDense"
58ba8c8a56SBarry Smith PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59ba8c8a56SBarry Smith {
60ba8c8a56SBarry Smith   PetscErrorCode ierr;
61ba8c8a56SBarry Smith 
62ba8c8a56SBarry Smith   PetscFunctionBegin;
63ba8c8a56SBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64ba8c8a56SBarry Smith   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65ba8c8a56SBarry Smith   PetscFunctionReturn(0);
66ba8c8a56SBarry Smith }
67ba8c8a56SBarry Smith 
680de54da6SSatish Balay EXTERN_C_BEGIN
694a2ae208SSatish Balay #undef __FUNCT__
704a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
720de54da6SSatish Balay {
730de54da6SSatish Balay   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
746849ba73SBarry Smith   PetscErrorCode ierr;
75899cda47SBarry Smith   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
7687828ca2SBarry Smith   PetscScalar    *array;
770de54da6SSatish Balay   MPI_Comm       comm;
780de54da6SSatish Balay 
790de54da6SSatish Balay   PetscFunctionBegin;
80899cda47SBarry Smith   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
810de54da6SSatish Balay 
820de54da6SSatish Balay   /* The reuse aspect is not implemented efficiently */
830de54da6SSatish Balay   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
840de54da6SSatish Balay 
850de54da6SSatish Balay   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
860de54da6SSatish Balay   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
87f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,B);CHKERRQ(ierr);
88f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
897adad957SLisandro Dalcin   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
905c5985e7SKris Buschelman   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
910de54da6SSatish Balay   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
920de54da6SSatish Balay   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
930de54da6SSatish Balay   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
940de54da6SSatish Balay 
950de54da6SSatish Balay   *iscopy = PETSC_TRUE;
960de54da6SSatish Balay   PetscFunctionReturn(0);
970de54da6SSatish Balay }
980de54da6SSatish Balay EXTERN_C_END
990de54da6SSatish Balay 
1004a2ae208SSatish Balay #undef __FUNCT__
1014a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIDense"
10213f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
1038965ea79SLois Curfman McInnes {
10439b7565bSBarry Smith   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
105dfbe8321SBarry Smith   PetscErrorCode ierr;
106899cda47SBarry Smith   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
107273d9f13SBarry Smith   PetscTruth     roworiented = A->roworiented;
1088965ea79SLois Curfman McInnes 
1093a40ed3dSBarry Smith   PetscFunctionBegin;
1108965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
1115ef9f2a5SBarry Smith     if (idxm[i] < 0) continue;
112899cda47SBarry Smith     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
1138965ea79SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
1148965ea79SLois Curfman McInnes       row = idxm[i] - rstart;
11539b7565bSBarry Smith       if (roworiented) {
11639b7565bSBarry Smith         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
1173a40ed3dSBarry Smith       } else {
1188965ea79SLois Curfman McInnes         for (j=0; j<n; j++) {
1195ef9f2a5SBarry Smith           if (idxn[j] < 0) continue;
120899cda47SBarry Smith           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
12139b7565bSBarry Smith           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
12239b7565bSBarry Smith         }
1238965ea79SLois Curfman McInnes       }
1243a40ed3dSBarry Smith     } else {
1253782ba37SSatish Balay       if (!A->donotstash) {
12639b7565bSBarry Smith         if (roworiented) {
1278798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
128d36fbae8SSatish Balay         } else {
1298798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
13039b7565bSBarry Smith         }
131b49de8d1SLois Curfman McInnes       }
132b49de8d1SLois Curfman McInnes     }
1333782ba37SSatish Balay   }
1343a40ed3dSBarry Smith   PetscFunctionReturn(0);
135b49de8d1SLois Curfman McInnes }
136b49de8d1SLois Curfman McInnes 
1374a2ae208SSatish Balay #undef __FUNCT__
1384a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIDense"
13913f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
140b49de8d1SLois Curfman McInnes {
141b49de8d1SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
142dfbe8321SBarry Smith   PetscErrorCode ierr;
143899cda47SBarry Smith   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
144b49de8d1SLois Curfman McInnes 
1453a40ed3dSBarry Smith   PetscFunctionBegin;
146b49de8d1SLois Curfman McInnes   for (i=0; i<m; i++) {
14797e567efSBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
148899cda47SBarry Smith     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
149b49de8d1SLois Curfman McInnes     if (idxm[i] >= rstart && idxm[i] < rend) {
150b49de8d1SLois Curfman McInnes       row = idxm[i] - rstart;
151b49de8d1SLois Curfman McInnes       for (j=0; j<n; j++) {
15297e567efSBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
153899cda47SBarry Smith         if (idxn[j] >= mat->cmap.N) {
15429bbc08cSBarry Smith           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
155a8c6a408SBarry Smith         }
156b49de8d1SLois Curfman McInnes         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
157b49de8d1SLois Curfman McInnes       }
158a8c6a408SBarry Smith     } else {
15929bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
1608965ea79SLois Curfman McInnes     }
1618965ea79SLois Curfman McInnes   }
1623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1638965ea79SLois Curfman McInnes }
1648965ea79SLois Curfman McInnes 
1654a2ae208SSatish Balay #undef __FUNCT__
1664a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_MPIDense"
167dfbe8321SBarry Smith PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
168ff14e315SSatish Balay {
169ff14e315SSatish Balay   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170dfbe8321SBarry Smith   PetscErrorCode ierr;
171ff14e315SSatish Balay 
1723a40ed3dSBarry Smith   PetscFunctionBegin;
173ff14e315SSatish Balay   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
1743a40ed3dSBarry Smith   PetscFunctionReturn(0);
175ff14e315SSatish Balay }
176ff14e315SSatish Balay 
1774a2ae208SSatish Balay #undef __FUNCT__
1784a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrix_MPIDense"
17913f74950SBarry Smith static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
180ca3fa75bSLois Curfman McInnes {
181ca3fa75bSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
182ca3fa75bSLois Curfman McInnes   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
1836849ba73SBarry Smith   PetscErrorCode ierr;
18413f74950SBarry Smith   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
18587828ca2SBarry Smith   PetscScalar    *av,*bv,*v = lmat->v;
186ca3fa75bSLois Curfman McInnes   Mat            newmat;
187ca3fa75bSLois Curfman McInnes 
188ca3fa75bSLois Curfman McInnes   PetscFunctionBegin;
189ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
190ca3fa75bSLois Curfman McInnes   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
191b9b97703SBarry Smith   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
192b9b97703SBarry Smith   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
193ca3fa75bSLois Curfman McInnes 
194ca3fa75bSLois Curfman McInnes   /* No parallel redistribution currently supported! Should really check each index set
1957eba5e9cSLois Curfman McInnes      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
1967eba5e9cSLois Curfman McInnes      original matrix! */
197ca3fa75bSLois Curfman McInnes 
198ca3fa75bSLois Curfman McInnes   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
1997eba5e9cSLois Curfman McInnes   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
200ca3fa75bSLois Curfman McInnes 
201ca3fa75bSLois Curfman McInnes   /* Check submatrix call */
202ca3fa75bSLois Curfman McInnes   if (scall == MAT_REUSE_MATRIX) {
20329bbc08cSBarry Smith     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
2047eba5e9cSLois Curfman McInnes     /* Really need to test rows and column sizes! */
205ca3fa75bSLois Curfman McInnes     newmat = *B;
206ca3fa75bSLois Curfman McInnes   } else {
207ca3fa75bSLois Curfman McInnes     /* Create and fill new matrix */
2087adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
209f69a0ea3SMatthew Knepley     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
2107adad957SLisandro Dalcin     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
211878740d9SKris Buschelman     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
212ca3fa75bSLois Curfman McInnes   }
213ca3fa75bSLois Curfman McInnes 
214ca3fa75bSLois Curfman McInnes   /* Now extract the data pointers and do the copy, column at a time */
215ca3fa75bSLois Curfman McInnes   newmatd = (Mat_MPIDense*)newmat->data;
216ca3fa75bSLois Curfman McInnes   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
217ca3fa75bSLois Curfman McInnes 
218ca3fa75bSLois Curfman McInnes   for (i=0; i<ncols; i++) {
219ca3fa75bSLois Curfman McInnes     av = v + nlrows*icol[i];
220ca3fa75bSLois Curfman McInnes     for (j=0; j<nrows; j++) {
2217eba5e9cSLois Curfman McInnes       *bv++ = av[irow[j] - rstart];
222ca3fa75bSLois Curfman McInnes     }
223ca3fa75bSLois Curfman McInnes   }
224ca3fa75bSLois Curfman McInnes 
225ca3fa75bSLois Curfman McInnes   /* Assemble the matrices so that the correct flags are set */
226ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227ca3fa75bSLois Curfman McInnes   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
228ca3fa75bSLois Curfman McInnes 
229ca3fa75bSLois Curfman McInnes   /* Free work space */
230ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
231ca3fa75bSLois Curfman McInnes   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
232ca3fa75bSLois Curfman McInnes   *B = newmat;
233ca3fa75bSLois Curfman McInnes   PetscFunctionReturn(0);
234ca3fa75bSLois Curfman McInnes }
235ca3fa75bSLois Curfman McInnes 
2364a2ae208SSatish Balay #undef __FUNCT__
2374a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_MPIDense"
238dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239ff14e315SSatish Balay {
2403a40ed3dSBarry Smith   PetscFunctionBegin;
2413a40ed3dSBarry Smith   PetscFunctionReturn(0);
242ff14e315SSatish Balay }
243ff14e315SSatish Balay 
2444a2ae208SSatish Balay #undef __FUNCT__
2454a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIDense"
246dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
2478965ea79SLois Curfman McInnes {
24839ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
2497adad957SLisandro Dalcin   MPI_Comm       comm = ((PetscObject)mat)->comm;
250dfbe8321SBarry Smith   PetscErrorCode ierr;
25113f74950SBarry Smith   PetscInt       nstash,reallocs;
2528965ea79SLois Curfman McInnes   InsertMode     addv;
2538965ea79SLois Curfman McInnes 
2543a40ed3dSBarry Smith   PetscFunctionBegin;
2558965ea79SLois Curfman McInnes   /* make sure all processors are either in INSERTMODE or ADDMODE */
256ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
2577056b6fcSBarry Smith   if (addv == (ADD_VALUES|INSERT_VALUES)) {
25829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
2598965ea79SLois Curfman McInnes   }
260e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
2618965ea79SLois Curfman McInnes 
2621e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
2638798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
264ae15b995SBarry Smith   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
2668965ea79SLois Curfman McInnes }
2678965ea79SLois Curfman McInnes 
2684a2ae208SSatish Balay #undef __FUNCT__
2694a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIDense"
270dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
2718965ea79SLois Curfman McInnes {
27239ddd567SLois Curfman McInnes   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
2736849ba73SBarry Smith   PetscErrorCode  ierr;
27413f74950SBarry Smith   PetscInt        i,*row,*col,flg,j,rstart,ncols;
27513f74950SBarry Smith   PetscMPIInt     n;
27687828ca2SBarry Smith   PetscScalar     *val;
277e0fa3b82SLois Curfman McInnes   InsertMode      addv=mat->insertmode;
2788965ea79SLois Curfman McInnes 
2793a40ed3dSBarry Smith   PetscFunctionBegin;
2808965ea79SLois Curfman McInnes   /*  wait on receives */
2817ef1d9bdSSatish Balay   while (1) {
2828798bf22SSatish Balay     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
2837ef1d9bdSSatish Balay     if (!flg) break;
2848965ea79SLois Curfman McInnes 
2857ef1d9bdSSatish Balay     for (i=0; i<n;) {
2867ef1d9bdSSatish Balay       /* Now identify the consecutive vals belonging to the same row */
2877ef1d9bdSSatish Balay       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
2887ef1d9bdSSatish Balay       if (j < n) ncols = j-i;
2897ef1d9bdSSatish Balay       else       ncols = n-i;
2907ef1d9bdSSatish Balay       /* Now assemble all these values with a single function call */
2917ef1d9bdSSatish Balay       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
2927ef1d9bdSSatish Balay       i = j;
2938965ea79SLois Curfman McInnes     }
2947ef1d9bdSSatish Balay   }
2958798bf22SSatish Balay   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
2968965ea79SLois Curfman McInnes 
29739ddd567SLois Curfman McInnes   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
29839ddd567SLois Curfman McInnes   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
2998965ea79SLois Curfman McInnes 
3006d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
30139ddd567SLois Curfman McInnes     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
3028965ea79SLois Curfman McInnes   }
3033a40ed3dSBarry Smith   PetscFunctionReturn(0);
3048965ea79SLois Curfman McInnes }
3058965ea79SLois Curfman McInnes 
3064a2ae208SSatish Balay #undef __FUNCT__
3074a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIDense"
308dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A)
3098965ea79SLois Curfman McInnes {
310dfbe8321SBarry Smith   PetscErrorCode ierr;
31139ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
3123a40ed3dSBarry Smith 
3133a40ed3dSBarry Smith   PetscFunctionBegin;
3143a40ed3dSBarry Smith   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
3153a40ed3dSBarry Smith   PetscFunctionReturn(0);
3168965ea79SLois Curfman McInnes }
3178965ea79SLois Curfman McInnes 
3188965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the
3198965ea79SLois Curfman McInnes    matrix is square and the column and row owerships are identical.
3208965ea79SLois Curfman McInnes    This is a BUG. The only way to fix it seems to be to access
3213501a2bdSLois Curfman McInnes    mdn->A and mdn->B directly and not through the MatZeroRows()
3228965ea79SLois Curfman McInnes    routine.
3238965ea79SLois Curfman McInnes */
3244a2ae208SSatish Balay #undef __FUNCT__
3254a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIDense"
326f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
3278965ea79SLois Curfman McInnes {
32839ddd567SLois Curfman McInnes   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
3296849ba73SBarry Smith   PetscErrorCode ierr;
330899cda47SBarry Smith   PetscInt       i,*owners = A->rmap.range;
33113f74950SBarry Smith   PetscInt       *nprocs,j,idx,nsends;
33213f74950SBarry Smith   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
3337adad957SLisandro Dalcin   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
33413f74950SBarry Smith   PetscInt       *lens,*lrows,*values;
33513f74950SBarry Smith   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
3367adad957SLisandro Dalcin   MPI_Comm       comm = ((PetscObject)A)->comm;
3378965ea79SLois Curfman McInnes   MPI_Request    *send_waits,*recv_waits;
3388965ea79SLois Curfman McInnes   MPI_Status     recv_status,*send_status;
33935d8aa7fSBarry Smith   PetscTruth     found;
3408965ea79SLois Curfman McInnes 
3413a40ed3dSBarry Smith   PetscFunctionBegin;
3428965ea79SLois Curfman McInnes   /*  first count number of contributors to each processor */
34313f74950SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
34413f74950SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
34513f74950SBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
3468965ea79SLois Curfman McInnes   for (i=0; i<N; i++) {
3478965ea79SLois Curfman McInnes     idx = rows[i];
34835d8aa7fSBarry Smith     found = PETSC_FALSE;
3498965ea79SLois Curfman McInnes     for (j=0; j<size; j++) {
3508965ea79SLois Curfman McInnes       if (idx >= owners[j] && idx < owners[j+1]) {
351c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
3528965ea79SLois Curfman McInnes       }
3538965ea79SLois Curfman McInnes     }
35429bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
3558965ea79SLois Curfman McInnes   }
356c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
3578965ea79SLois Curfman McInnes 
3588965ea79SLois Curfman McInnes   /* inform other processors of number of messages and max length*/
359c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
3608965ea79SLois Curfman McInnes 
3618965ea79SLois Curfman McInnes   /* post receives:   */
36213f74950SBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
363b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
3648965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
36513f74950SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3668965ea79SLois Curfman McInnes   }
3678965ea79SLois Curfman McInnes 
3688965ea79SLois Curfman McInnes   /* do sends:
3698965ea79SLois Curfman McInnes       1) starts[i] gives the starting index in svalues for stuff going to
3708965ea79SLois Curfman McInnes          the ith processor
3718965ea79SLois Curfman McInnes   */
37213f74950SBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
373b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
37413f74950SBarry Smith   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
3758965ea79SLois Curfman McInnes   starts[0]  = 0;
376c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3778965ea79SLois Curfman McInnes   for (i=0; i<N; i++) {
3788965ea79SLois Curfman McInnes     svalues[starts[owner[i]]++] = rows[i];
3798965ea79SLois Curfman McInnes   }
3808965ea79SLois Curfman McInnes 
3818965ea79SLois Curfman McInnes   starts[0] = 0;
382c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
3838965ea79SLois Curfman McInnes   count = 0;
3848965ea79SLois Curfman McInnes   for (i=0; i<size; i++) {
385c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
38613f74950SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
3878965ea79SLois Curfman McInnes     }
3888965ea79SLois Curfman McInnes   }
389606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
3908965ea79SLois Curfman McInnes 
3918965ea79SLois Curfman McInnes   base = owners[rank];
3928965ea79SLois Curfman McInnes 
3938965ea79SLois Curfman McInnes   /*  wait on receives */
39413f74950SBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
3958965ea79SLois Curfman McInnes   source = lens + nrecvs;
3968965ea79SLois Curfman McInnes   count  = nrecvs; slen = 0;
3978965ea79SLois Curfman McInnes   while (count) {
398ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
3998965ea79SLois Curfman McInnes     /* unpack receives into our local space */
40013f74950SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
4018965ea79SLois Curfman McInnes     source[imdex]  = recv_status.MPI_SOURCE;
4028965ea79SLois Curfman McInnes     lens[imdex]    = n;
4038965ea79SLois Curfman McInnes     slen += n;
4048965ea79SLois Curfman McInnes     count--;
4058965ea79SLois Curfman McInnes   }
406606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
4078965ea79SLois Curfman McInnes 
4088965ea79SLois Curfman McInnes   /* move the data into the send scatter */
40913f74950SBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
4108965ea79SLois Curfman McInnes   count = 0;
4118965ea79SLois Curfman McInnes   for (i=0; i<nrecvs; i++) {
4128965ea79SLois Curfman McInnes     values = rvalues + i*nmax;
4138965ea79SLois Curfman McInnes     for (j=0; j<lens[i]; j++) {
4148965ea79SLois Curfman McInnes       lrows[count++] = values[j] - base;
4158965ea79SLois Curfman McInnes     }
4168965ea79SLois Curfman McInnes   }
417606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
418606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
419606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
420606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
4218965ea79SLois Curfman McInnes 
4228965ea79SLois Curfman McInnes   /* actually zap the local rows */
423f4df32b1SMatthew Knepley   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
424606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
4258965ea79SLois Curfman McInnes 
4268965ea79SLois Curfman McInnes   /* wait on sends */
4278965ea79SLois Curfman McInnes   if (nsends) {
428b0a32e0cSBarry Smith     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
429ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
430606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
4318965ea79SLois Curfman McInnes   }
432606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
433606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
4348965ea79SLois Curfman McInnes 
4353a40ed3dSBarry Smith   PetscFunctionReturn(0);
4368965ea79SLois Curfman McInnes }
4378965ea79SLois Curfman McInnes 
4384a2ae208SSatish Balay #undef __FUNCT__
4394a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIDense"
440dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
4418965ea79SLois Curfman McInnes {
44239ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
443dfbe8321SBarry Smith   PetscErrorCode ierr;
444c456f294SBarry Smith 
4453a40ed3dSBarry Smith   PetscFunctionBegin;
446ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
447ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
44844cd7ae7SLois Curfman McInnes   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
4493a40ed3dSBarry Smith   PetscFunctionReturn(0);
4508965ea79SLois Curfman McInnes }
4518965ea79SLois Curfman McInnes 
4524a2ae208SSatish Balay #undef __FUNCT__
4534a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIDense"
454dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
4558965ea79SLois Curfman McInnes {
45639ddd567SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
457dfbe8321SBarry Smith   PetscErrorCode ierr;
458c456f294SBarry Smith 
4593a40ed3dSBarry Smith   PetscFunctionBegin;
460ca9f406cSSatish Balay   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
461ca9f406cSSatish Balay   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
46244cd7ae7SLois Curfman McInnes   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
4633a40ed3dSBarry Smith   PetscFunctionReturn(0);
4648965ea79SLois Curfman McInnes }
4658965ea79SLois Curfman McInnes 
4664a2ae208SSatish Balay #undef __FUNCT__
4674a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIDense"
468dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
469096963f5SLois Curfman McInnes {
470096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
471dfbe8321SBarry Smith   PetscErrorCode ierr;
47287828ca2SBarry Smith   PetscScalar    zero = 0.0;
473096963f5SLois Curfman McInnes 
4743a40ed3dSBarry Smith   PetscFunctionBegin;
4752dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
4767c922b88SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
477ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
478ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4793a40ed3dSBarry Smith   PetscFunctionReturn(0);
480096963f5SLois Curfman McInnes }
481096963f5SLois Curfman McInnes 
4824a2ae208SSatish Balay #undef __FUNCT__
4834a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
484dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
485096963f5SLois Curfman McInnes {
486096963f5SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
487dfbe8321SBarry Smith   PetscErrorCode ierr;
488096963f5SLois Curfman McInnes 
4893a40ed3dSBarry Smith   PetscFunctionBegin;
4903501a2bdSLois Curfman McInnes   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
4917c922b88SBarry Smith   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
492ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
493ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4943a40ed3dSBarry Smith   PetscFunctionReturn(0);
495096963f5SLois Curfman McInnes }
496096963f5SLois Curfman McInnes 
4974a2ae208SSatish Balay #undef __FUNCT__
4984a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIDense"
499dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
5008965ea79SLois Curfman McInnes {
50139ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502096963f5SLois Curfman McInnes   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
503dfbe8321SBarry Smith   PetscErrorCode ierr;
504899cda47SBarry Smith   PetscInt       len,i,n,m = A->rmap.n,radd;
50587828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
506ed3cc1f0SBarry Smith 
5073a40ed3dSBarry Smith   PetscFunctionBegin;
5082dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
5091ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
510096963f5SLois Curfman McInnes   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
511899cda47SBarry Smith   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
512899cda47SBarry Smith   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
513899cda47SBarry Smith   radd = A->rmap.rstart*m;
51444cd7ae7SLois Curfman McInnes   for (i=0; i<len; i++) {
515096963f5SLois Curfman McInnes     x[i] = aloc->v[radd + i*m + i];
516096963f5SLois Curfman McInnes   }
5171ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
5183a40ed3dSBarry Smith   PetscFunctionReturn(0);
5198965ea79SLois Curfman McInnes }
5208965ea79SLois Curfman McInnes 
5214a2ae208SSatish Balay #undef __FUNCT__
5224a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIDense"
523dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat)
5248965ea79SLois Curfman McInnes {
5253501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
526dfbe8321SBarry Smith   PetscErrorCode ierr;
52701b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
52801b82886SBarry Smith   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
52901b82886SBarry Smith #endif
530ed3cc1f0SBarry Smith 
5313a40ed3dSBarry Smith   PetscFunctionBegin;
53294d884c6SBarry Smith 
533aa482453SBarry Smith #if defined(PETSC_USE_LOG)
534899cda47SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
5358965ea79SLois Curfman McInnes #endif
5368798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
5373501a2bdSLois Curfman McInnes   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
53805b42c5fSBarry Smith   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
53905b42c5fSBarry Smith   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
54001b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
54101b82886SBarry Smith   if (lu->CleanUpPlapack) {
542*62b4c0b3SBarry Smith     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
543*62b4c0b3SBarry Smith     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
544*62b4c0b3SBarry Smith     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
545*62b4c0b3SBarry Smith     ierr = PLA_Finalize();CHKERRQ(ierr);
54601b82886SBarry Smith 
54701b82886SBarry Smith     ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
54801b82886SBarry Smith     ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
54901b82886SBarry Smith     ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
550622d7880SLois Curfman McInnes   }
55101b82886SBarry Smith #endif
55201b82886SBarry Smith 
553606d414cSSatish Balay   ierr = PetscFree(mdn);CHKERRQ(ierr);
554dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
555901853e0SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
556901853e0SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
5574ae313f4SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
5584ae313f4SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
5594ae313f4SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
5603a40ed3dSBarry Smith   PetscFunctionReturn(0);
5618965ea79SLois Curfman McInnes }
56239ddd567SLois Curfman McInnes 
5634a2ae208SSatish Balay #undef __FUNCT__
5644a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIDense_Binary"
5656849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
5668965ea79SLois Curfman McInnes {
56739ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
568dfbe8321SBarry Smith   PetscErrorCode    ierr;
569aa05aa95SBarry Smith   PetscViewerFormat format;
570aa05aa95SBarry Smith   int               fd;
571aa05aa95SBarry Smith   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
572aa05aa95SBarry Smith   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
573578230a0SSatish Balay   PetscScalar       *work,*v,*vv;
574aa05aa95SBarry Smith   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
575aa05aa95SBarry Smith   MPI_Status        status;
5767056b6fcSBarry Smith 
5773a40ed3dSBarry Smith   PetscFunctionBegin;
57839ddd567SLois Curfman McInnes   if (mdn->size == 1) {
57939ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
580aa05aa95SBarry Smith   } else {
581aa05aa95SBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
5827adad957SLisandro Dalcin     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
5837adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
584aa05aa95SBarry Smith 
585aa05aa95SBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
586aa05aa95SBarry Smith     if (format == PETSC_VIEWER_BINARY_NATIVE) {
587aa05aa95SBarry Smith 
588aa05aa95SBarry Smith       if (!rank) {
589aa05aa95SBarry Smith 	/* store the matrix as a dense matrix */
590aa05aa95SBarry Smith 	header[0] = MAT_FILE_COOKIE;
591aa05aa95SBarry Smith 	header[1] = mat->rmap.N;
592aa05aa95SBarry Smith 	header[2] = N;
593aa05aa95SBarry Smith 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
594aa05aa95SBarry Smith 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
595aa05aa95SBarry Smith 
596aa05aa95SBarry Smith 	/* get largest work array needed for transposing array */
597aa05aa95SBarry Smith         mmax = mat->rmap.n;
598aa05aa95SBarry Smith         for (i=1; i<size; i++) {
599aa05aa95SBarry Smith           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
6008965ea79SLois Curfman McInnes         }
601aa05aa95SBarry Smith 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
602aa05aa95SBarry Smith 
603aa05aa95SBarry Smith 	/* write out local array, by rows */
604aa05aa95SBarry Smith         m    = mat->rmap.n;
605aa05aa95SBarry Smith 	v    = a->v;
606aa05aa95SBarry Smith         for (j=0; j<N; j++) {
607aa05aa95SBarry Smith           for (i=0; i<m; i++) {
608578230a0SSatish Balay 	    work[j + i*N] = *v++;
609aa05aa95SBarry Smith 	  }
610aa05aa95SBarry Smith 	}
611aa05aa95SBarry Smith 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
612aa05aa95SBarry Smith         /* get largest work array to receive messages from other processes, excludes process zero */
613aa05aa95SBarry Smith         mmax = 0;
614aa05aa95SBarry Smith         for (i=1; i<size; i++) {
615aa05aa95SBarry Smith           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
616aa05aa95SBarry Smith         }
617578230a0SSatish Balay 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
618578230a0SSatish Balay         v = vv;
619aa05aa95SBarry Smith         for (k=1; k<size; k++) {
620aa05aa95SBarry Smith           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
6217adad957SLisandro Dalcin           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
622aa05aa95SBarry Smith 
623aa05aa95SBarry Smith           for (j=0; j<N; j++) {
624aa05aa95SBarry Smith             for (i=0; i<m; i++) {
625578230a0SSatish Balay               work[j + i*N] = *v++;
626aa05aa95SBarry Smith 	    }
627aa05aa95SBarry Smith 	  }
628aa05aa95SBarry Smith 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
629aa05aa95SBarry Smith         }
630aa05aa95SBarry Smith         ierr = PetscFree(work);CHKERRQ(ierr);
631578230a0SSatish Balay         ierr = PetscFree(vv);CHKERRQ(ierr);
632aa05aa95SBarry Smith       } else {
6337adad957SLisandro Dalcin         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
634aa05aa95SBarry Smith       }
635aa05aa95SBarry Smith     }
636aa05aa95SBarry Smith   }
6373a40ed3dSBarry Smith   PetscFunctionReturn(0);
6388965ea79SLois Curfman McInnes }
6398965ea79SLois Curfman McInnes 
6404a2ae208SSatish Balay #undef __FUNCT__
6414a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
6426849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
6438965ea79SLois Curfman McInnes {
64439ddd567SLois Curfman McInnes   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645dfbe8321SBarry Smith   PetscErrorCode    ierr;
64632dcc486SBarry Smith   PetscMPIInt       size = mdn->size,rank = mdn->rank;
647b0a32e0cSBarry Smith   PetscViewerType   vtype;
64832077d6dSBarry Smith   PetscTruth        iascii,isdraw;
649b0a32e0cSBarry Smith   PetscViewer       sviewer;
650f3ef73ceSBarry Smith   PetscViewerFormat format;
65101b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
65201b82886SBarry Smith   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
65301b82886SBarry Smith #endif
6548965ea79SLois Curfman McInnes 
6553a40ed3dSBarry Smith   PetscFunctionBegin;
65632077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
657fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
65832077d6dSBarry Smith   if (iascii) {
659b0a32e0cSBarry Smith     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
660b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
661456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
6624e220ebcSLois Curfman McInnes       MatInfo info;
663888f2ed8SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
664899cda47SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
66577431f27SBarry Smith                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
666b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
6673501a2bdSLois Curfman McInnes       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
66801b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
66901b82886SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
67001b82886SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr);
67101b82886SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
67201b82886SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->ierror);CHKERRQ(ierr);
67301b82886SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr);
67401b82886SBarry Smith #endif
6753a40ed3dSBarry Smith       PetscFunctionReturn(0);
676fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
6773a40ed3dSBarry Smith       PetscFunctionReturn(0);
6788965ea79SLois Curfman McInnes     }
679f1af5d2fSBarry Smith   } else if (isdraw) {
680b0a32e0cSBarry Smith     PetscDraw  draw;
681f1af5d2fSBarry Smith     PetscTruth isnull;
682f1af5d2fSBarry Smith 
683b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
684b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
685f1af5d2fSBarry Smith     if (isnull) PetscFunctionReturn(0);
686f1af5d2fSBarry Smith   }
68777ed5343SBarry Smith 
6888965ea79SLois Curfman McInnes   if (size == 1) {
68939ddd567SLois Curfman McInnes     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
6903a40ed3dSBarry Smith   } else {
6918965ea79SLois Curfman McInnes     /* assemble the entire matrix onto first processor. */
6928965ea79SLois Curfman McInnes     Mat         A;
693899cda47SBarry Smith     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
694ba8c8a56SBarry Smith     PetscInt    *cols;
695ba8c8a56SBarry Smith     PetscScalar *vals;
6968965ea79SLois Curfman McInnes 
6977adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
6988965ea79SLois Curfman McInnes     if (!rank) {
699f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
7003a40ed3dSBarry Smith     } else {
701f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
7028965ea79SLois Curfman McInnes     }
7037adad957SLisandro Dalcin     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
704878740d9SKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
705878740d9SKris Buschelman     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
70652e6d16bSBarry Smith     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
7078965ea79SLois Curfman McInnes 
70839ddd567SLois Curfman McInnes     /* Copy the matrix ... This isn't the most efficient means,
70939ddd567SLois Curfman McInnes        but it's quick for now */
71051022da4SBarry Smith     A->insertmode = INSERT_VALUES;
711899cda47SBarry Smith     row = mat->rmap.rstart; m = mdn->A->rmap.n;
7128965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
713ba8c8a56SBarry Smith       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
714ba8c8a56SBarry Smith       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
715ba8c8a56SBarry Smith       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
71639ddd567SLois Curfman McInnes       row++;
7178965ea79SLois Curfman McInnes     }
7188965ea79SLois Curfman McInnes 
7196d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7206d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
721b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
722b9b97703SBarry Smith     if (!rank) {
7236831982aSBarry Smith       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
7248965ea79SLois Curfman McInnes     }
725b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
726b0a32e0cSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
7278965ea79SLois Curfman McInnes     ierr = MatDestroy(A);CHKERRQ(ierr);
7288965ea79SLois Curfman McInnes   }
7293a40ed3dSBarry Smith   PetscFunctionReturn(0);
7308965ea79SLois Curfman McInnes }
7318965ea79SLois Curfman McInnes 
7324a2ae208SSatish Balay #undef __FUNCT__
7334a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIDense"
734dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
7358965ea79SLois Curfman McInnes {
736dfbe8321SBarry Smith   PetscErrorCode ierr;
73732077d6dSBarry Smith   PetscTruth     iascii,isbinary,isdraw,issocket;
7388965ea79SLois Curfman McInnes 
739433994e6SBarry Smith   PetscFunctionBegin;
7400f5bd95cSBarry Smith 
74132077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
742fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
743b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
744fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
7450f5bd95cSBarry Smith 
74632077d6dSBarry Smith   if (iascii || issocket || isdraw) {
747f1af5d2fSBarry Smith     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
7480f5bd95cSBarry Smith   } else if (isbinary) {
7493a40ed3dSBarry Smith     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
7505cd90555SBarry Smith   } else {
751958c9bccSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
7528965ea79SLois Curfman McInnes   }
7533a40ed3dSBarry Smith   PetscFunctionReturn(0);
7548965ea79SLois Curfman McInnes }
7558965ea79SLois Curfman McInnes 
7564a2ae208SSatish Balay #undef __FUNCT__
7574a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIDense"
758dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
7598965ea79SLois Curfman McInnes {
7603501a2bdSLois Curfman McInnes   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
7613501a2bdSLois Curfman McInnes   Mat            mdn = mat->A;
762dfbe8321SBarry Smith   PetscErrorCode ierr;
763329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
7648965ea79SLois Curfman McInnes 
7653a40ed3dSBarry Smith   PetscFunctionBegin;
766899cda47SBarry Smith   info->rows_global    = (double)A->rmap.N;
767899cda47SBarry Smith   info->columns_global = (double)A->cmap.N;
768899cda47SBarry Smith   info->rows_local     = (double)A->rmap.n;
769899cda47SBarry Smith   info->columns_local  = (double)A->cmap.N;
7704e220ebcSLois Curfman McInnes   info->block_size     = 1.0;
7714e220ebcSLois Curfman McInnes   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
7724e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
7734e220ebcSLois Curfman McInnes   isend[3] = info->memory;  isend[4] = info->mallocs;
7748965ea79SLois Curfman McInnes   if (flag == MAT_LOCAL) {
7754e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
7764e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
7774e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
7784e220ebcSLois Curfman McInnes     info->memory       = isend[3];
7794e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
7808965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_MAX) {
7817adad957SLisandro Dalcin     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
7824e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7834e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7844e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7854e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7864e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7878965ea79SLois Curfman McInnes   } else if (flag == MAT_GLOBAL_SUM) {
7887adad957SLisandro Dalcin     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
7894e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
7904e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
7914e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
7924e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
7934e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
7948965ea79SLois Curfman McInnes   }
7954e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
7964e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
7974e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
7983a40ed3dSBarry Smith   PetscFunctionReturn(0);
7998965ea79SLois Curfman McInnes }
8008965ea79SLois Curfman McInnes 
8014a2ae208SSatish Balay #undef __FUNCT__
8024a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIDense"
8034e0d8c25SBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
8048965ea79SLois Curfman McInnes {
80539ddd567SLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
806dfbe8321SBarry Smith   PetscErrorCode ierr;
8078965ea79SLois Curfman McInnes 
8083a40ed3dSBarry Smith   PetscFunctionBegin;
80912c028f9SKris Buschelman   switch (op) {
810512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
81112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
81212c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
8134e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
81412c028f9SKris Buschelman     break;
81512c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
8164e0d8c25SBarry Smith     a->roworiented = flg;
8174e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
81812c028f9SKris Buschelman     break;
8194e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
82012c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
821290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
82212c028f9SKris Buschelman     break;
82312c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
8244e0d8c25SBarry Smith     a->donotstash = flg;
82512c028f9SKris Buschelman     break;
82677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
82777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
8289a4540c5SBarry Smith   case MAT_HERMITIAN:
8299a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
830600fe468SBarry Smith   case MAT_IGNORE_LOWER_TRIANGULAR:
831290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
83277e54ba9SKris Buschelman     break;
83312c028f9SKris Buschelman   default:
834600fe468SBarry Smith     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
8353a40ed3dSBarry Smith   }
8363a40ed3dSBarry Smith   PetscFunctionReturn(0);
8378965ea79SLois Curfman McInnes }
8388965ea79SLois Curfman McInnes 
8398965ea79SLois Curfman McInnes 
8404a2ae208SSatish Balay #undef __FUNCT__
8414a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIDense"
842dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
8435b2fa520SLois Curfman McInnes {
8445b2fa520SLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
8455b2fa520SLois Curfman McInnes   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
84687828ca2SBarry Smith   PetscScalar    *l,*r,x,*v;
847dfbe8321SBarry Smith   PetscErrorCode ierr;
848899cda47SBarry Smith   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
8495b2fa520SLois Curfman McInnes 
8505b2fa520SLois Curfman McInnes   PetscFunctionBegin;
85172d926a5SLois Curfman McInnes   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
8525b2fa520SLois Curfman McInnes   if (ll) {
85372d926a5SLois Curfman McInnes     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
854175be7b4SMatthew Knepley     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
8551ebc52fbSHong Zhang     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
8565b2fa520SLois Curfman McInnes     for (i=0; i<m; i++) {
8575b2fa520SLois Curfman McInnes       x = l[i];
8585b2fa520SLois Curfman McInnes       v = mat->v + i;
8595b2fa520SLois Curfman McInnes       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
8605b2fa520SLois Curfman McInnes     }
8611ebc52fbSHong Zhang     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
862efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8635b2fa520SLois Curfman McInnes   }
8645b2fa520SLois Curfman McInnes   if (rr) {
865175be7b4SMatthew Knepley     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
866175be7b4SMatthew Knepley     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
867ca9f406cSSatish Balay     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868ca9f406cSSatish Balay     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8691ebc52fbSHong Zhang     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
8705b2fa520SLois Curfman McInnes     for (i=0; i<n; i++) {
8715b2fa520SLois Curfman McInnes       x = r[i];
8725b2fa520SLois Curfman McInnes       v = mat->v + i*m;
8735b2fa520SLois Curfman McInnes       for (j=0; j<m; j++) { (*v++) *= x;}
8745b2fa520SLois Curfman McInnes     }
8751ebc52fbSHong Zhang     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
876efee365bSSatish Balay     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
8775b2fa520SLois Curfman McInnes   }
8785b2fa520SLois Curfman McInnes   PetscFunctionReturn(0);
8795b2fa520SLois Curfman McInnes }
8805b2fa520SLois Curfman McInnes 
8814a2ae208SSatish Balay #undef __FUNCT__
8824a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIDense"
883dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
884096963f5SLois Curfman McInnes {
8853501a2bdSLois Curfman McInnes   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
8863501a2bdSLois Curfman McInnes   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
887dfbe8321SBarry Smith   PetscErrorCode ierr;
88813f74950SBarry Smith   PetscInt       i,j;
889329f5518SBarry Smith   PetscReal      sum = 0.0;
89087828ca2SBarry Smith   PetscScalar    *v = mat->v;
8913501a2bdSLois Curfman McInnes 
8923a40ed3dSBarry Smith   PetscFunctionBegin;
8933501a2bdSLois Curfman McInnes   if (mdn->size == 1) {
894064f8208SBarry Smith     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
8953501a2bdSLois Curfman McInnes   } else {
8963501a2bdSLois Curfman McInnes     if (type == NORM_FROBENIUS) {
897899cda47SBarry Smith       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
898aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
899329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
9003501a2bdSLois Curfman McInnes #else
9013501a2bdSLois Curfman McInnes         sum += (*v)*(*v); v++;
9023501a2bdSLois Curfman McInnes #endif
9033501a2bdSLois Curfman McInnes       }
9047adad957SLisandro Dalcin       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
905064f8208SBarry Smith       *nrm = sqrt(*nrm);
906899cda47SBarry Smith       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
9073a40ed3dSBarry Smith     } else if (type == NORM_1) {
908329f5518SBarry Smith       PetscReal *tmp,*tmp2;
909899cda47SBarry Smith       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
910899cda47SBarry Smith       tmp2 = tmp + A->cmap.N;
911899cda47SBarry Smith       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
912064f8208SBarry Smith       *nrm = 0.0;
9133501a2bdSLois Curfman McInnes       v = mat->v;
914899cda47SBarry Smith       for (j=0; j<mdn->A->cmap.n; j++) {
915899cda47SBarry Smith         for (i=0; i<mdn->A->rmap.n; i++) {
91667e560aaSBarry Smith           tmp[j] += PetscAbsScalar(*v);  v++;
9173501a2bdSLois Curfman McInnes         }
9183501a2bdSLois Curfman McInnes       }
9197adad957SLisandro Dalcin       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
920899cda47SBarry Smith       for (j=0; j<A->cmap.N; j++) {
921064f8208SBarry Smith         if (tmp2[j] > *nrm) *nrm = tmp2[j];
9223501a2bdSLois Curfman McInnes       }
923606d414cSSatish Balay       ierr = PetscFree(tmp);CHKERRQ(ierr);
924899cda47SBarry Smith       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
9253a40ed3dSBarry Smith     } else if (type == NORM_INFINITY) { /* max row norm */
926329f5518SBarry Smith       PetscReal ntemp;
9273501a2bdSLois Curfman McInnes       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
9287adad957SLisandro Dalcin       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
9293a40ed3dSBarry Smith     } else {
93029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
9313501a2bdSLois Curfman McInnes     }
9323501a2bdSLois Curfman McInnes   }
9333a40ed3dSBarry Smith   PetscFunctionReturn(0);
9343501a2bdSLois Curfman McInnes }
9353501a2bdSLois Curfman McInnes 
9364a2ae208SSatish Balay #undef __FUNCT__
9374a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIDense"
938fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
9393501a2bdSLois Curfman McInnes {
9403501a2bdSLois Curfman McInnes   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
9413501a2bdSLois Curfman McInnes   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
9423501a2bdSLois Curfman McInnes   Mat            B;
943899cda47SBarry Smith   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
9446849ba73SBarry Smith   PetscErrorCode ierr;
94513f74950SBarry Smith   PetscInt       j,i;
94687828ca2SBarry Smith   PetscScalar    *v;
9473501a2bdSLois Curfman McInnes 
9483a40ed3dSBarry Smith   PetscFunctionBegin;
949fc4dec0aSBarry Smith   if (A == *matout && M != N) {
95029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
9517056b6fcSBarry Smith   }
952fc4dec0aSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
9537adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
954f69a0ea3SMatthew Knepley     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
9557adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
956878740d9SKris Buschelman     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
957fc4dec0aSBarry Smith   } else {
958fc4dec0aSBarry Smith     B = *matout;
959fc4dec0aSBarry Smith   }
9603501a2bdSLois Curfman McInnes 
961899cda47SBarry Smith   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
9621acff37aSSatish Balay   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
9633501a2bdSLois Curfman McInnes   for (i=0; i<m; i++) rwork[i] = rstart + i;
9641acff37aSSatish Balay   for (j=0; j<n; j++) {
9653501a2bdSLois Curfman McInnes     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
9663501a2bdSLois Curfman McInnes     v   += m;
9673501a2bdSLois Curfman McInnes   }
968606d414cSSatish Balay   ierr = PetscFree(rwork);CHKERRQ(ierr);
9696d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9706d4a8577SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971fc4dec0aSBarry Smith   if (*matout != A) {
9723501a2bdSLois Curfman McInnes     *matout = B;
9733501a2bdSLois Curfman McInnes   } else {
974273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
9753501a2bdSLois Curfman McInnes   }
9763a40ed3dSBarry Smith   PetscFunctionReturn(0);
977096963f5SLois Curfman McInnes }
978096963f5SLois Curfman McInnes 
979d9eff348SSatish Balay #include "petscblaslapack.h"
9804a2ae208SSatish Balay #undef __FUNCT__
9814a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIDense"
982f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
98344cd7ae7SLois Curfman McInnes {
98444cd7ae7SLois Curfman McInnes   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
98544cd7ae7SLois Curfman McInnes   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
986f4df32b1SMatthew Knepley   PetscScalar    oalpha = alpha;
987efee365bSSatish Balay   PetscErrorCode ierr;
9880805154bSBarry Smith   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
98944cd7ae7SLois Curfman McInnes 
9903a40ed3dSBarry Smith   PetscFunctionBegin;
991f4df32b1SMatthew Knepley   BLASscal_(&nz,&oalpha,a->v,&one);
992efee365bSSatish Balay   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
9933a40ed3dSBarry Smith   PetscFunctionReturn(0);
99444cd7ae7SLois Curfman McInnes }
99544cd7ae7SLois Curfman McInnes 
9966849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
9978965ea79SLois Curfman McInnes 
9984a2ae208SSatish Balay #undef __FUNCT__
9994a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1000dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1001273d9f13SBarry Smith {
1002dfbe8321SBarry Smith   PetscErrorCode ierr;
1003273d9f13SBarry Smith 
1004273d9f13SBarry Smith   PetscFunctionBegin;
1005273d9f13SBarry Smith   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006273d9f13SBarry Smith   PetscFunctionReturn(0);
1007273d9f13SBarry Smith }
1008273d9f13SBarry Smith 
10094ae313f4SHong Zhang #undef __FUNCT__
10104ae313f4SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
10114ae313f4SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
10124ae313f4SHong Zhang {
10134ae313f4SHong Zhang   PetscErrorCode ierr;
10144ae313f4SHong Zhang   PetscInt       m=A->rmap.n,n=B->cmap.n;
10154ae313f4SHong Zhang   Mat            Cmat;
10164ae313f4SHong Zhang 
10174ae313f4SHong Zhang   PetscFunctionBegin;
10184ae313f4SHong Zhang   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
10197adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
10204ae313f4SHong Zhang   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
10214ae313f4SHong Zhang   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
10224ae313f4SHong Zhang   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
10232a6744ebSBarry Smith   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10240a71e23eSMatthew Knepley   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10254ae313f4SHong Zhang   *C = Cmat;
10264ae313f4SHong Zhang   PetscFunctionReturn(0);
10274ae313f4SHong Zhang }
10284ae313f4SHong Zhang 
102901b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
103001b82886SBarry Smith #undef __FUNCT__
103101b82886SBarry Smith #define __FUNCT__ "MatSolve_MPIDense"
103201b82886SBarry Smith PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
103301b82886SBarry Smith {
103401b82886SBarry Smith   MPI_Comm       comm = ((PetscObject)A)->comm;
103501b82886SBarry Smith   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
103601b82886SBarry Smith   PetscErrorCode ierr;
103701b82886SBarry Smith   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
103801b82886SBarry Smith   PetscScalar    *array;
103901b82886SBarry Smith   PetscReal      one = 1.0;
104001b82886SBarry Smith   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
104101b82886SBarry Smith   PLA_Obj        v_pla = NULL;
104201b82886SBarry Smith   PetscScalar    *loc_buf;
104301b82886SBarry Smith   Vec            loc_x;
104401b82886SBarry Smith 
104501b82886SBarry Smith   PetscFunctionBegin;
104601b82886SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
104701b82886SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
104801b82886SBarry Smith 
104901b82886SBarry Smith   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1050*62b4c0b3SBarry Smith   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1051*62b4c0b3SBarry Smith   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
105201b82886SBarry Smith 
105301b82886SBarry Smith   /* Copy b into rhs_pla */
1054*62b4c0b3SBarry Smith   ierr = PLA_API_begin();CHKERRQ(ierr);
1055*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
105601b82886SBarry Smith   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1057*62b4c0b3SBarry Smith   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
105801b82886SBarry Smith   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1059*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1060*62b4c0b3SBarry Smith   ierr = PLA_API_end();CHKERRQ(ierr);
106101b82886SBarry Smith 
106201b82886SBarry Smith   if (A->factor == FACTOR_LU){
106301b82886SBarry Smith     /* Apply the permutations to the right hand sides */
1064*62b4c0b3SBarry Smith     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
106501b82886SBarry Smith 
106601b82886SBarry Smith     /* Solve L y = b, overwriting b with y */
1067*62b4c0b3SBarry Smith     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
106801b82886SBarry Smith 
106901b82886SBarry Smith     /* Solve U x = y (=b), overwriting b with x */
1070*62b4c0b3SBarry Smith     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
107101b82886SBarry Smith   } else { /* FACTOR_CHOLESKY */
1072*62b4c0b3SBarry Smith     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1073*62b4c0b3SBarry Smith     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
107401b82886SBarry Smith   }
107501b82886SBarry Smith 
107601b82886SBarry Smith   /* Copy PLAPACK x into Petsc vector x  */
1077*62b4c0b3SBarry Smith   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1078*62b4c0b3SBarry Smith   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1079*62b4c0b3SBarry Smith   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
108001b82886SBarry Smith   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
108101b82886SBarry Smith   if (!lu->pla_solved){
108201b82886SBarry Smith 
1083*62b4c0b3SBarry Smith     ierr = PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);CHKERRQ(ierr);
1084*62b4c0b3SBarry Smith     ierr = PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);CHKERRQ(ierr);
108501b82886SBarry Smith     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
108601b82886SBarry Smith 
108701b82886SBarry Smith     /* Create IS and cts for VecScatterring */
1088*62b4c0b3SBarry Smith     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1089*62b4c0b3SBarry Smith     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
109001b82886SBarry Smith     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
109101b82886SBarry Smith     idx_petsc = idx_pla + loc_m;
109201b82886SBarry Smith 
109301b82886SBarry Smith     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
109401b82886SBarry Smith     for (i=0; i<loc_m; i+=lu->nb){
109501b82886SBarry Smith       j = 0;
109601b82886SBarry Smith       while (j < lu->nb && i+j < loc_m){
109701b82886SBarry Smith         idx_petsc[i+j] = rstart + j; j++;
109801b82886SBarry Smith       }
109901b82886SBarry Smith       rstart += size*lu->nb;
110001b82886SBarry Smith     }
110101b82886SBarry Smith 
110201b82886SBarry Smith     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
110301b82886SBarry Smith 
110401b82886SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
110501b82886SBarry Smith     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
110601b82886SBarry Smith     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
110701b82886SBarry Smith     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
110801b82886SBarry Smith   }
110901b82886SBarry Smith   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
111001b82886SBarry Smith   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
111101b82886SBarry Smith 
111201b82886SBarry Smith   /* Free data */
111301b82886SBarry Smith   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1114*62b4c0b3SBarry Smith   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
111501b82886SBarry Smith 
111601b82886SBarry Smith   lu->pla_solved = PETSC_TRUE;
111701b82886SBarry Smith   PetscFunctionReturn(0);
111801b82886SBarry Smith }
111901b82886SBarry Smith 
112001b82886SBarry Smith #undef __FUNCT__
112101b82886SBarry Smith #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
112201b82886SBarry Smith PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
112301b82886SBarry Smith {
112401b82886SBarry Smith   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
112501b82886SBarry Smith   PetscErrorCode ierr;
112601b82886SBarry Smith   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
112701b82886SBarry Smith   PetscInt       info_pla=0;
112801b82886SBarry Smith   PetscScalar    *array,one = 1.0;
112901b82886SBarry Smith 
113001b82886SBarry Smith   PetscFunctionBegin;
1131*62b4c0b3SBarry Smith   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
113201b82886SBarry Smith 
113301b82886SBarry Smith   /* Copy A into lu->A */
1134*62b4c0b3SBarry Smith   ierr = PLA_API_begin();CHKERRQ(ierr);
1135*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
113601b82886SBarry Smith   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
113701b82886SBarry Smith   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1138*62b4c0b3SBarry Smith   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
113901b82886SBarry Smith   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1140*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1141*62b4c0b3SBarry Smith   ierr = PLA_API_end();CHKERRQ(ierr);
114201b82886SBarry Smith 
114301b82886SBarry Smith   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
114401b82886SBarry Smith   info_pla = PLA_LU(lu->A,lu->pivots);
114501b82886SBarry Smith   if (info_pla != 0)
114601b82886SBarry Smith     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
114701b82886SBarry Smith 
114801b82886SBarry Smith   lu->CleanUpPlapack = PETSC_TRUE;
114901b82886SBarry Smith   lu->rstart         = rstart;
115001b82886SBarry Smith 
115101b82886SBarry Smith   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
115201b82886SBarry Smith   PetscFunctionReturn(0);
115301b82886SBarry Smith }
115401b82886SBarry Smith 
115501b82886SBarry Smith #undef __FUNCT__
115601b82886SBarry Smith #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
115701b82886SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
115801b82886SBarry Smith {
115901b82886SBarry Smith   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
116001b82886SBarry Smith   PetscErrorCode ierr;
116101b82886SBarry Smith   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
116201b82886SBarry Smith   PetscInt       info_pla=0;
116301b82886SBarry Smith   PetscScalar    *array,one = 1.0;
116401b82886SBarry Smith 
116501b82886SBarry Smith   PetscFunctionBegin;
1166e0fbc2eeSBarry Smith 
116701b82886SBarry Smith   /* Copy A into lu->A */
1168*62b4c0b3SBarry Smith   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1169*62b4c0b3SBarry Smith   ierr = PLA_API_begin();CHKERRQ(ierr);
1170*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
117101b82886SBarry Smith   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
117201b82886SBarry Smith   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1173*62b4c0b3SBarry Smith   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
117401b82886SBarry Smith   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1175*62b4c0b3SBarry Smith   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1176*62b4c0b3SBarry Smith   ierr = PLA_API_end();CHKERRQ(ierr);
117701b82886SBarry Smith 
117801b82886SBarry Smith   /* Factor P A -> Chol */
117901b82886SBarry Smith   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
118001b82886SBarry Smith   if (info_pla != 0)
118101b82886SBarry Smith     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
118201b82886SBarry Smith 
118301b82886SBarry Smith   lu->CleanUpPlapack = PETSC_TRUE;
118401b82886SBarry Smith   lu->rstart         = rstart;
118501b82886SBarry Smith 
118601b82886SBarry Smith   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
118701b82886SBarry Smith   PetscFunctionReturn(0);
118801b82886SBarry Smith }
118901b82886SBarry Smith 
119001b82886SBarry Smith #undef __FUNCT__
119101b82886SBarry Smith #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
119201b82886SBarry Smith PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
119301b82886SBarry Smith {
119401b82886SBarry Smith   Mat            B;
119501b82886SBarry Smith   Mat_Plapack    *lu;
119601b82886SBarry Smith   PetscErrorCode ierr;
119701b82886SBarry Smith   PetscInt       M=A->rmap.N,N=A->cmap.N;
119801b82886SBarry Smith   MPI_Comm       comm=((PetscObject)A)->comm,comm_2d;
119901b82886SBarry Smith   PetscMPIInt    size;
120001b82886SBarry Smith   PetscInt       ierror;
120101b82886SBarry Smith 
120201b82886SBarry Smith   PetscFunctionBegin;
120301b82886SBarry Smith   /* Create the factorization matrix */
120401b82886SBarry Smith   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
120501b82886SBarry Smith   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
120601b82886SBarry Smith   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
120701b82886SBarry Smith 
120801b82886SBarry Smith   lu = (Mat_Plapack*)(B->spptr);
120901b82886SBarry Smith 
121001b82886SBarry Smith   /* Set default Plapack parameters */
121101b82886SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
121201b82886SBarry Smith   lu->nprows = 1; lu->npcols = size;
121301b82886SBarry Smith   ierror = 0;
121401b82886SBarry Smith   lu->nb     = M/size;
121501b82886SBarry Smith   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
121601b82886SBarry Smith 
121701b82886SBarry Smith   /* Set runtime options */
121801b82886SBarry Smith   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
121901b82886SBarry Smith   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr);
122001b82886SBarry Smith   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr);
122101b82886SBarry Smith 
122201b82886SBarry Smith   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
122301b82886SBarry Smith   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
122401b82886SBarry Smith   if (ierror){
1225*62b4c0b3SBarry Smith     ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
122601b82886SBarry Smith   } else {
1227*62b4c0b3SBarry Smith     ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
122801b82886SBarry Smith   }
122901b82886SBarry Smith   lu->ierror = ierror;
123001b82886SBarry Smith 
123101b82886SBarry Smith   lu->nb_alg = 0;
123201b82886SBarry Smith   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
123301b82886SBarry Smith   if (lu->nb_alg){
1234*62b4c0b3SBarry Smith     ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
123501b82886SBarry Smith   }
123601b82886SBarry Smith   PetscOptionsEnd();
123701b82886SBarry Smith 
123801b82886SBarry Smith 
123901b82886SBarry Smith   /* Create a 2D communicator */
1240*62b4c0b3SBarry Smith   ierr = PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);CHKERRQ(ierr);
124101b82886SBarry Smith   lu->comm_2d = comm_2d;
124201b82886SBarry Smith 
124301b82886SBarry Smith   /* Initialize PLAPACK */
1244*62b4c0b3SBarry Smith   ierr = PLA_Init(comm_2d);CHKERRQ(ierr);
124501b82886SBarry Smith 
124601b82886SBarry Smith   /* Create object distribution template */
124701b82886SBarry Smith   lu->templ = NULL;
1248*62b4c0b3SBarry Smith   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
124901b82886SBarry Smith 
125001b82886SBarry Smith   /* Use suggested nb_alg if it is not provided by user */
125101b82886SBarry Smith   if (lu->nb_alg == 0){
1252*62b4c0b3SBarry Smith     ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr);
1253*62b4c0b3SBarry Smith     ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
125401b82886SBarry Smith   }
125501b82886SBarry Smith 
125601b82886SBarry Smith   /* Set the datatype */
125701b82886SBarry Smith #if defined(PETSC_USE_COMPLEX)
125801b82886SBarry Smith   lu->datatype = MPI_DOUBLE_COMPLEX;
125901b82886SBarry Smith #else
126001b82886SBarry Smith   lu->datatype = MPI_DOUBLE;
126101b82886SBarry Smith #endif
126201b82886SBarry Smith 
1263e1156936SBarry Smith   ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1264e1156936SBarry Smith 
126501b82886SBarry Smith   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
126601b82886SBarry Smith   lu->CleanUpPlapack = PETSC_TRUE;
126701b82886SBarry Smith   *F                 = B;
126801b82886SBarry Smith   PetscFunctionReturn(0);
126901b82886SBarry Smith }
127001b82886SBarry Smith 
127101b82886SBarry Smith /* Note the Petsc r and c permutations are ignored */
127201b82886SBarry Smith #undef __FUNCT__
127301b82886SBarry Smith #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
127401b82886SBarry Smith PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
127501b82886SBarry Smith {
127601b82886SBarry Smith   PetscErrorCode ierr;
1277e1156936SBarry Smith   PetscInt       M = A->rmap.N;
1278e1156936SBarry Smith   Mat_Plapack    *lu;
127901b82886SBarry Smith 
128001b82886SBarry Smith   PetscFunctionBegin;
128101b82886SBarry Smith   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1282e1156936SBarry Smith   lu = (Mat_Plapack*)(*F)->spptr;
1283e1156936SBarry Smith   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
128401b82886SBarry Smith   (*F)->factor = FACTOR_LU;
128501b82886SBarry Smith   PetscFunctionReturn(0);
128601b82886SBarry Smith }
128701b82886SBarry Smith 
128801b82886SBarry Smith /* Note the Petsc perm permutation is ignored */
128901b82886SBarry Smith #undef __FUNCT__
129001b82886SBarry Smith #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
129101b82886SBarry Smith PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
129201b82886SBarry Smith {
129301b82886SBarry Smith   PetscErrorCode ierr;
129401b82886SBarry Smith   PetscTruth     issymmetric,set;
129501b82886SBarry Smith 
129601b82886SBarry Smith   PetscFunctionBegin;
129701b82886SBarry Smith   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
129801b82886SBarry Smith   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
129901b82886SBarry Smith   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
130001b82886SBarry Smith   (*F)->factor = FACTOR_CHOLESKY;
130101b82886SBarry Smith   PetscFunctionReturn(0);
130201b82886SBarry Smith }
130301b82886SBarry Smith #endif
130401b82886SBarry Smith 
13058965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/
130609dc0095SBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
130709dc0095SBarry Smith        MatGetRow_MPIDense,
130809dc0095SBarry Smith        MatRestoreRow_MPIDense,
130909dc0095SBarry Smith        MatMult_MPIDense,
131097304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense,
13117c922b88SBarry Smith        MatMultTranspose_MPIDense,
13127c922b88SBarry Smith        MatMultTransposeAdd_MPIDense,
131301b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
131401b82886SBarry Smith        MatSolve_MPIDense,
131501b82886SBarry Smith #else
13168965ea79SLois Curfman McInnes        0,
131701b82886SBarry Smith #endif
131809dc0095SBarry Smith        0,
131909dc0095SBarry Smith        0,
132097304618SKris Buschelman /*10*/ 0,
132109dc0095SBarry Smith        0,
132209dc0095SBarry Smith        0,
132309dc0095SBarry Smith        0,
132409dc0095SBarry Smith        MatTranspose_MPIDense,
132597304618SKris Buschelman /*15*/ MatGetInfo_MPIDense,
13266e4ee0c6SHong Zhang        MatEqual_MPIDense,
132709dc0095SBarry Smith        MatGetDiagonal_MPIDense,
13285b2fa520SLois Curfman McInnes        MatDiagonalScale_MPIDense,
132909dc0095SBarry Smith        MatNorm_MPIDense,
133097304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIDense,
133109dc0095SBarry Smith        MatAssemblyEnd_MPIDense,
133209dc0095SBarry Smith        0,
133309dc0095SBarry Smith        MatSetOption_MPIDense,
133409dc0095SBarry Smith        MatZeroEntries_MPIDense,
133597304618SKris Buschelman /*25*/ MatZeroRows_MPIDense,
133601b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
13370ad19415SHong Zhang        MatLUFactorSymbolic_MPIDense,
133801b82886SBarry Smith        MatLUFactorNumeric_MPIDense,
13390ad19415SHong Zhang        MatCholeskyFactorSymbolic_MPIDense,
134001b82886SBarry Smith        MatCholeskyFactorNumeric_MPIDense,
134101b82886SBarry Smith #else
1342919b68f7SBarry Smith        0,
134301b82886SBarry Smith        0,
134401b82886SBarry Smith        0,
134501b82886SBarry Smith        0,
134601b82886SBarry Smith #endif
134797304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIDense,
1348273d9f13SBarry Smith        0,
134909dc0095SBarry Smith        0,
135009dc0095SBarry Smith        MatGetArray_MPIDense,
135109dc0095SBarry Smith        MatRestoreArray_MPIDense,
135297304618SKris Buschelman /*35*/ MatDuplicate_MPIDense,
135309dc0095SBarry Smith        0,
135409dc0095SBarry Smith        0,
135509dc0095SBarry Smith        0,
135609dc0095SBarry Smith        0,
135797304618SKris Buschelman /*40*/ 0,
13582ce60cd0SSatish Balay        MatGetSubMatrices_MPIDense,
135909dc0095SBarry Smith        0,
136009dc0095SBarry Smith        MatGetValues_MPIDense,
136109dc0095SBarry Smith        0,
136297304618SKris Buschelman /*45*/ 0,
136309dc0095SBarry Smith        MatScale_MPIDense,
136409dc0095SBarry Smith        0,
136509dc0095SBarry Smith        0,
136609dc0095SBarry Smith        0,
1367521d7252SBarry Smith /*50*/ 0,
136809dc0095SBarry Smith        0,
136909dc0095SBarry Smith        0,
137009dc0095SBarry Smith        0,
137109dc0095SBarry Smith        0,
137297304618SKris Buschelman /*55*/ 0,
137309dc0095SBarry Smith        0,
137409dc0095SBarry Smith        0,
137509dc0095SBarry Smith        0,
137609dc0095SBarry Smith        0,
137797304618SKris Buschelman /*60*/ MatGetSubMatrix_MPIDense,
1378b9b97703SBarry Smith        MatDestroy_MPIDense,
1379b9b97703SBarry Smith        MatView_MPIDense,
1380357abbc8SBarry Smith        0,
138197304618SKris Buschelman        0,
138297304618SKris Buschelman /*65*/ 0,
138397304618SKris Buschelman        0,
138497304618SKris Buschelman        0,
138597304618SKris Buschelman        0,
138697304618SKris Buschelman        0,
138797304618SKris Buschelman /*70*/ 0,
138897304618SKris Buschelman        0,
138997304618SKris Buschelman        0,
139097304618SKris Buschelman        0,
139197304618SKris Buschelman        0,
139297304618SKris Buschelman /*75*/ 0,
139397304618SKris Buschelman        0,
139497304618SKris Buschelman        0,
139597304618SKris Buschelman        0,
139697304618SKris Buschelman        0,
139797304618SKris Buschelman /*80*/ 0,
139897304618SKris Buschelman        0,
139997304618SKris Buschelman        0,
140097304618SKris Buschelman        0,
1401865e5f61SKris Buschelman /*84*/ MatLoad_MPIDense,
1402865e5f61SKris Buschelman        0,
1403865e5f61SKris Buschelman        0,
1404865e5f61SKris Buschelman        0,
1405865e5f61SKris Buschelman        0,
1406865e5f61SKris Buschelman        0,
1407865e5f61SKris Buschelman /*90*/ 0,
14084ae313f4SHong Zhang        MatMatMultSymbolic_MPIDense_MPIDense,
1409865e5f61SKris Buschelman        0,
1410865e5f61SKris Buschelman        0,
1411865e5f61SKris Buschelman        0,
1412865e5f61SKris Buschelman /*95*/ 0,
1413865e5f61SKris Buschelman        0,
1414865e5f61SKris Buschelman        0,
1415865e5f61SKris Buschelman        0};
14168965ea79SLois Curfman McInnes 
1417273d9f13SBarry Smith EXTERN_C_BEGIN
14184a2ae208SSatish Balay #undef __FUNCT__
1419a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1420be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1421a23d5eceSKris Buschelman {
1422a23d5eceSKris Buschelman   Mat_MPIDense   *a;
1423dfbe8321SBarry Smith   PetscErrorCode ierr;
1424a23d5eceSKris Buschelman 
1425a23d5eceSKris Buschelman   PetscFunctionBegin;
1426a23d5eceSKris Buschelman   mat->preallocated = PETSC_TRUE;
1427a23d5eceSKris Buschelman   /* Note:  For now, when data is specified above, this assumes the user correctly
1428a23d5eceSKris Buschelman    allocates the local dense storage space.  We should add error checking. */
1429a23d5eceSKris Buschelman 
1430a23d5eceSKris Buschelman   a    = (Mat_MPIDense*)mat->data;
1431f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1432899cda47SBarry Smith   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
14335c5985e7SKris Buschelman   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
14345c5985e7SKris Buschelman   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
143552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1436a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1437a23d5eceSKris Buschelman }
1438a23d5eceSKris Buschelman EXTERN_C_END
1439a23d5eceSKris Buschelman 
14407878bbefSBarry Smith EXTERN_C_BEGIN
14417878bbefSBarry Smith #if defined(PETSC_HAVE_PLAPACK)
14427878bbefSBarry Smith #undef __FUNCT__
14437878bbefSBarry Smith #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
14447878bbefSBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
14457878bbefSBarry Smith {
14467878bbefSBarry Smith   PetscFunctionBegin;
14477878bbefSBarry Smith   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
14487878bbefSBarry Smith   PetscFunctionReturn(0);
14497878bbefSBarry Smith }
14507878bbefSBarry Smith #endif
14517a667e6fSSatish Balay EXTERN_C_END
14527878bbefSBarry Smith 
14530bad9183SKris Buschelman /*MC
1454fafad747SKris Buschelman    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
14550bad9183SKris Buschelman 
14560bad9183SKris Buschelman    Options Database Keys:
14570bad9183SKris Buschelman . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
14580bad9183SKris Buschelman 
14590bad9183SKris Buschelman   Level: beginner
14600bad9183SKris Buschelman 
14617878bbefSBarry Smith   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
14627878bbefSBarry Smith   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
14637878bbefSBarry Smith   (run config/configure.py with the option --download-plapack)
14647878bbefSBarry Smith 
14657878bbefSBarry Smith 
14667878bbefSBarry Smith   Options Database Keys:
14677878bbefSBarry Smith . -mat_plapack_nprows <n> - number of rows in processor partition
14687878bbefSBarry Smith . -mat_plapack_npcols <n> - number of columns in processor partition
14697878bbefSBarry Smith . -mat_plapack_nb <n> - block size of template vector
14707878bbefSBarry Smith . -mat_plapack_nb_alg <n> - algorithmic block size
14717878bbefSBarry Smith - -mat_plapack_ckerror <n> - error checking flag
14727878bbefSBarry Smith 
14737878bbefSBarry Smith .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
14740bad9183SKris Buschelman M*/
14750bad9183SKris Buschelman 
1476a23d5eceSKris Buschelman EXTERN_C_BEGIN
1477a23d5eceSKris Buschelman #undef __FUNCT__
14784a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIDense"
1479be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1480273d9f13SBarry Smith {
1481273d9f13SBarry Smith   Mat_MPIDense   *a;
1482dfbe8321SBarry Smith   PetscErrorCode ierr;
148301b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
148401b82886SBarry Smith   Mat_Plapack    *lu;
148501b82886SBarry Smith #endif
1486273d9f13SBarry Smith 
1487273d9f13SBarry Smith   PetscFunctionBegin;
148838f2d2fdSLisandro Dalcin   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1489b0a32e0cSBarry Smith   mat->data         = (void*)a;
1490273d9f13SBarry Smith   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1491273d9f13SBarry Smith   mat->factor       = 0;
1492273d9f13SBarry Smith   mat->mapping      = 0;
1493273d9f13SBarry Smith 
1494273d9f13SBarry Smith   mat->insertmode = NOT_SET_VALUES;
14957adad957SLisandro Dalcin   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
14967adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1497273d9f13SBarry Smith 
1498899cda47SBarry Smith   mat->rmap.bs = mat->cmap.bs = 1;
14996148ca0dSBarry Smith   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
15006148ca0dSBarry Smith   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1501899cda47SBarry Smith   a->nvec = mat->cmap.n;
1502273d9f13SBarry Smith 
1503273d9f13SBarry Smith   /* build cache for off array entries formed */
1504273d9f13SBarry Smith   a->donotstash = PETSC_FALSE;
15057adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1506273d9f13SBarry Smith 
1507273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
1508273d9f13SBarry Smith   a->lvec        = 0;
1509273d9f13SBarry Smith   a->Mvctx       = 0;
1510273d9f13SBarry Smith   a->roworiented = PETSC_TRUE;
1511273d9f13SBarry Smith 
1512273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1513273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIDense",
1514273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1515a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1516a23d5eceSKris Buschelman                                      "MatMPIDenseSetPreallocation_MPIDense",
1517a23d5eceSKris Buschelman                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
15184ae313f4SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
15194ae313f4SHong Zhang                                      "MatMatMult_MPIAIJ_MPIDense",
15204ae313f4SHong Zhang                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
15214ae313f4SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
15224ae313f4SHong Zhang                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
15234ae313f4SHong Zhang                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
15244ae313f4SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
15254ae313f4SHong Zhang                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
15264ae313f4SHong Zhang                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
15277878bbefSBarry Smith #if defined(PETSC_HAVE_PLAPACK)
15287878bbefSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
15297878bbefSBarry Smith                                      "MatSetSolverType_MPIDense_PLAPACK",
15307878bbefSBarry Smith                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
15317878bbefSBarry Smith #endif
153238aed534SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
153301b82886SBarry Smith 
153401b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
153501b82886SBarry Smith   ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr);
153601b82886SBarry Smith   lu->CleanUpPlapack       = PETSC_FALSE;
153701b82886SBarry Smith   mat->spptr                 = (void*)lu;
153801b82886SBarry Smith #endif
1539273d9f13SBarry Smith   PetscFunctionReturn(0);
1540273d9f13SBarry Smith }
1541273d9f13SBarry Smith EXTERN_C_END
1542273d9f13SBarry Smith 
1543209238afSKris Buschelman /*MC
1544002d173eSKris Buschelman    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1545209238afSKris Buschelman 
1546209238afSKris Buschelman    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1547209238afSKris Buschelman    and MATMPIDENSE otherwise.
1548209238afSKris Buschelman 
1549209238afSKris Buschelman    Options Database Keys:
1550209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1551209238afSKris Buschelman 
1552209238afSKris Buschelman   Level: beginner
1553209238afSKris Buschelman 
155401b82886SBarry Smith 
1555209238afSKris Buschelman .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1556209238afSKris Buschelman M*/
1557209238afSKris Buschelman 
1558209238afSKris Buschelman EXTERN_C_BEGIN
1559209238afSKris Buschelman #undef __FUNCT__
1560209238afSKris Buschelman #define __FUNCT__ "MatCreate_Dense"
1561be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1562dfbe8321SBarry Smith {
15636849ba73SBarry Smith   PetscErrorCode ierr;
156413f74950SBarry Smith   PetscMPIInt    size;
1565209238afSKris Buschelman 
1566209238afSKris Buschelman   PetscFunctionBegin;
15677adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1568209238afSKris Buschelman   if (size == 1) {
1569209238afSKris Buschelman     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1570209238afSKris Buschelman   } else {
1571209238afSKris Buschelman     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1572209238afSKris Buschelman   }
1573209238afSKris Buschelman   PetscFunctionReturn(0);
1574209238afSKris Buschelman }
1575209238afSKris Buschelman EXTERN_C_END
1576209238afSKris Buschelman 
15774a2ae208SSatish Balay #undef __FUNCT__
15784a2ae208SSatish Balay #define __FUNCT__ "MatMPIDenseSetPreallocation"
1579273d9f13SBarry Smith /*@C
1580273d9f13SBarry Smith    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1581273d9f13SBarry Smith 
1582273d9f13SBarry Smith    Not collective
1583273d9f13SBarry Smith 
1584273d9f13SBarry Smith    Input Parameters:
1585273d9f13SBarry Smith .  A - the matrix
1586273d9f13SBarry Smith -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1587273d9f13SBarry Smith    to control all matrix memory allocation.
1588273d9f13SBarry Smith 
1589273d9f13SBarry Smith    Notes:
1590273d9f13SBarry Smith    The dense format is fully compatible with standard Fortran 77
1591273d9f13SBarry Smith    storage by columns.
1592273d9f13SBarry Smith 
1593273d9f13SBarry Smith    The data input variable is intended primarily for Fortran programmers
1594273d9f13SBarry Smith    who wish to allocate their own matrix memory space.  Most users should
1595273d9f13SBarry Smith    set data=PETSC_NULL.
1596273d9f13SBarry Smith 
1597273d9f13SBarry Smith    Level: intermediate
1598273d9f13SBarry Smith 
1599273d9f13SBarry Smith .keywords: matrix,dense, parallel
1600273d9f13SBarry Smith 
1601273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1602273d9f13SBarry Smith @*/
1603be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1604273d9f13SBarry Smith {
1605dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1606273d9f13SBarry Smith 
1607273d9f13SBarry Smith   PetscFunctionBegin;
1608565567f0SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1609a23d5eceSKris Buschelman   if (f) {
1610a23d5eceSKris Buschelman     ierr = (*f)(mat,data);CHKERRQ(ierr);
1611a23d5eceSKris Buschelman   }
1612273d9f13SBarry Smith   PetscFunctionReturn(0);
1613273d9f13SBarry Smith }
1614273d9f13SBarry Smith 
16154a2ae208SSatish Balay #undef __FUNCT__
16164a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIDense"
16178965ea79SLois Curfman McInnes /*@C
161839ddd567SLois Curfman McInnes    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
16198965ea79SLois Curfman McInnes 
1620db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1621db81eaa0SLois Curfman McInnes 
16228965ea79SLois Curfman McInnes    Input Parameters:
1623db81eaa0SLois Curfman McInnes +  comm - MPI communicator
16248965ea79SLois Curfman McInnes .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1625db81eaa0SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
16268965ea79SLois Curfman McInnes .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1627db81eaa0SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
16287f5ff6fdSBarry Smith -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1629dfc5480cSLois Curfman McInnes    to control all matrix memory allocation.
16308965ea79SLois Curfman McInnes 
16318965ea79SLois Curfman McInnes    Output Parameter:
1632477f1c0bSLois Curfman McInnes .  A - the matrix
16338965ea79SLois Curfman McInnes 
1634b259b22eSLois Curfman McInnes    Notes:
163539ddd567SLois Curfman McInnes    The dense format is fully compatible with standard Fortran 77
163639ddd567SLois Curfman McInnes    storage by columns.
16378965ea79SLois Curfman McInnes 
163818f449edSLois Curfman McInnes    The data input variable is intended primarily for Fortran programmers
163918f449edSLois Curfman McInnes    who wish to allocate their own matrix memory space.  Most users should
16407f5ff6fdSBarry Smith    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
164118f449edSLois Curfman McInnes 
16428965ea79SLois Curfman McInnes    The user MUST specify either the local or global matrix dimensions
16438965ea79SLois Curfman McInnes    (possibly both).
16448965ea79SLois Curfman McInnes 
1645027ccd11SLois Curfman McInnes    Level: intermediate
1646027ccd11SLois Curfman McInnes 
164739ddd567SLois Curfman McInnes .keywords: matrix,dense, parallel
16488965ea79SLois Curfman McInnes 
164939ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
16508965ea79SLois Curfman McInnes @*/
1651be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
16528965ea79SLois Curfman McInnes {
16536849ba73SBarry Smith   PetscErrorCode ierr;
165413f74950SBarry Smith   PetscMPIInt    size;
16558965ea79SLois Curfman McInnes 
16563a40ed3dSBarry Smith   PetscFunctionBegin;
1657f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1658f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1659273d9f13SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1660273d9f13SBarry Smith   if (size > 1) {
1661273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1662273d9f13SBarry Smith     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1663273d9f13SBarry Smith   } else {
1664273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1665273d9f13SBarry Smith     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
16668c469469SLois Curfman McInnes   }
16673a40ed3dSBarry Smith   PetscFunctionReturn(0);
16688965ea79SLois Curfman McInnes }
16698965ea79SLois Curfman McInnes 
16704a2ae208SSatish Balay #undef __FUNCT__
16714a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIDense"
16726849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
16738965ea79SLois Curfman McInnes {
16748965ea79SLois Curfman McInnes   Mat            mat;
16753501a2bdSLois Curfman McInnes   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1676dfbe8321SBarry Smith   PetscErrorCode ierr;
16778965ea79SLois Curfman McInnes 
16783a40ed3dSBarry Smith   PetscFunctionBegin;
16798965ea79SLois Curfman McInnes   *newmat       = 0;
16807adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1681899cda47SBarry Smith   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
16827adad957SLisandro Dalcin   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1683834f8fabSBarry Smith   a                 = (Mat_MPIDense*)mat->data;
1684e04c1aa4SHong Zhang   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
16853501a2bdSLois Curfman McInnes   mat->factor       = A->factor;
1686c456f294SBarry Smith   mat->assembled    = PETSC_TRUE;
1687273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
16888965ea79SLois Curfman McInnes 
1689899cda47SBarry Smith   mat->rmap.rstart     = A->rmap.rstart;
1690899cda47SBarry Smith   mat->rmap.rend       = A->rmap.rend;
16918965ea79SLois Curfman McInnes   a->size              = oldmat->size;
16928965ea79SLois Curfman McInnes   a->rank              = oldmat->rank;
1693e0fa3b82SLois Curfman McInnes   mat->insertmode      = NOT_SET_VALUES;
1694b9b97703SBarry Smith   a->nvec              = oldmat->nvec;
16953782ba37SSatish Balay   a->donotstash        = oldmat->donotstash;
1696e04c1aa4SHong Zhang 
1697899cda47SBarry Smith   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1698899cda47SBarry Smith   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
16997adad957SLisandro Dalcin   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
17008965ea79SLois Curfman McInnes 
1701329f5518SBarry Smith   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
17025609ef8eSBarry Smith   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
170352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
170401b82886SBarry Smith 
170501b82886SBarry Smith #if defined(PETSC_HAVE_PLAPACK)
170601b82886SBarry Smith   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
170701b82886SBarry Smith #endif
17088965ea79SLois Curfman McInnes   *newmat = mat;
17093a40ed3dSBarry Smith   PetscFunctionReturn(0);
17108965ea79SLois Curfman McInnes }
17118965ea79SLois Curfman McInnes 
1712e090d566SSatish Balay #include "petscsys.h"
17138965ea79SLois Curfman McInnes 
17144a2ae208SSatish Balay #undef __FUNCT__
17154a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1716f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
171790ace30eSBarry Smith {
17186849ba73SBarry Smith   PetscErrorCode ierr;
171932dcc486SBarry Smith   PetscMPIInt    rank,size;
172013f74950SBarry Smith   PetscInt       *rowners,i,m,nz,j;
172187828ca2SBarry Smith   PetscScalar    *array,*vals,*vals_ptr;
172290ace30eSBarry Smith   MPI_Status     status;
172390ace30eSBarry Smith 
17243a40ed3dSBarry Smith   PetscFunctionBegin;
1725d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1726d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
172790ace30eSBarry Smith 
172890ace30eSBarry Smith   /* determine ownership of all rows */
172990ace30eSBarry Smith   m          = M/size + ((M % size) > rank);
173013f74950SBarry Smith   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
173113f74950SBarry Smith   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
173290ace30eSBarry Smith   rowners[0] = 0;
173390ace30eSBarry Smith   for (i=2; i<=size; i++) {
173490ace30eSBarry Smith     rowners[i] += rowners[i-1];
173590ace30eSBarry Smith   }
173690ace30eSBarry Smith 
1737f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1738f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1739be5d1d56SKris Buschelman   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1740878740d9SKris Buschelman   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
174190ace30eSBarry Smith   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
174290ace30eSBarry Smith 
174390ace30eSBarry Smith   if (!rank) {
174487828ca2SBarry Smith     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
174590ace30eSBarry Smith 
174690ace30eSBarry Smith     /* read in my part of the matrix numerical values  */
17470752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
174890ace30eSBarry Smith 
174990ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
175090ace30eSBarry Smith     vals_ptr = vals;
175190ace30eSBarry Smith     for (i=0; i<m; i++) {
175290ace30eSBarry Smith       for (j=0; j<N; j++) {
175390ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
175490ace30eSBarry Smith       }
175590ace30eSBarry Smith     }
175690ace30eSBarry Smith 
175790ace30eSBarry Smith     /* read in other processors and ship out */
175890ace30eSBarry Smith     for (i=1; i<size; i++) {
175990ace30eSBarry Smith       nz   = (rowners[i+1] - rowners[i])*N;
17600752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
17617adad957SLisandro Dalcin       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
176290ace30eSBarry Smith     }
17633a40ed3dSBarry Smith   } else {
176490ace30eSBarry Smith     /* receive numeric values */
176587828ca2SBarry Smith     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
176690ace30eSBarry Smith 
176790ace30eSBarry Smith     /* receive message of values*/
17687adad957SLisandro Dalcin     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
176990ace30eSBarry Smith 
177090ace30eSBarry Smith     /* insert into matrix-by row (this is why cannot directly read into array */
177190ace30eSBarry Smith     vals_ptr = vals;
177290ace30eSBarry Smith     for (i=0; i<m; i++) {
177390ace30eSBarry Smith       for (j=0; j<N; j++) {
177490ace30eSBarry Smith         array[i + j*m] = *vals_ptr++;
177590ace30eSBarry Smith       }
177690ace30eSBarry Smith     }
177790ace30eSBarry Smith   }
1778606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
1779606d414cSSatish Balay   ierr = PetscFree(vals);CHKERRQ(ierr);
17806d4a8577SBarry Smith   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17816d4a8577SBarry Smith   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17823a40ed3dSBarry Smith   PetscFunctionReturn(0);
178390ace30eSBarry Smith }
178490ace30eSBarry Smith 
17854a2ae208SSatish Balay #undef __FUNCT__
17864a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIDense"
1787f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
17888965ea79SLois Curfman McInnes {
17898965ea79SLois Curfman McInnes   Mat            A;
179087828ca2SBarry Smith   PetscScalar    *vals,*svals;
179119bcc07fSBarry Smith   MPI_Comm       comm = ((PetscObject)viewer)->comm;
17928965ea79SLois Curfman McInnes   MPI_Status     status;
179313f74950SBarry Smith   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
179413f74950SBarry Smith   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
179513f74950SBarry Smith   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
179613f74950SBarry Smith   PetscInt       i,nz,j,rstart,rend;
179713f74950SBarry Smith   int            fd;
17986849ba73SBarry Smith   PetscErrorCode ierr;
17998965ea79SLois Curfman McInnes 
18003a40ed3dSBarry Smith   PetscFunctionBegin;
1801d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1802d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
18038965ea79SLois Curfman McInnes   if (!rank) {
1804b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
18050752156aSBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1806552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
18078965ea79SLois Curfman McInnes   }
18088965ea79SLois Curfman McInnes 
180913f74950SBarry Smith   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
181090ace30eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
181190ace30eSBarry Smith 
181290ace30eSBarry Smith   /*
181390ace30eSBarry Smith        Handle case where matrix is stored on disk as a dense matrix
181490ace30eSBarry Smith   */
181590ace30eSBarry Smith   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1816be5d1d56SKris Buschelman     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
18173a40ed3dSBarry Smith     PetscFunctionReturn(0);
181890ace30eSBarry Smith   }
181990ace30eSBarry Smith 
18208965ea79SLois Curfman McInnes   /* determine ownership of all rows */
1821e44c0bd4SBarry Smith   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1822e44c0bd4SBarry Smith   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1823ca161407SBarry Smith   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
18248965ea79SLois Curfman McInnes   rowners[0] = 0;
18258965ea79SLois Curfman McInnes   for (i=2; i<=size; i++) {
18268965ea79SLois Curfman McInnes     rowners[i] += rowners[i-1];
18278965ea79SLois Curfman McInnes   }
18288965ea79SLois Curfman McInnes   rstart = rowners[rank];
18298965ea79SLois Curfman McInnes   rend   = rowners[rank+1];
18308965ea79SLois Curfman McInnes 
18318965ea79SLois Curfman McInnes   /* distribute row lengths to all processors */
183213f74950SBarry Smith   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
18338965ea79SLois Curfman McInnes   offlens = ourlens + (rend-rstart);
18348965ea79SLois Curfman McInnes   if (!rank) {
183513f74950SBarry Smith     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
18360752156aSBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
183713f74950SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
18388965ea79SLois Curfman McInnes     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
18391466f1e1SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1840606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1841ca161407SBarry Smith   } else {
18421466f1e1SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
18438965ea79SLois Curfman McInnes   }
18448965ea79SLois Curfman McInnes 
18458965ea79SLois Curfman McInnes   if (!rank) {
18468965ea79SLois Curfman McInnes     /* calculate the number of nonzeros on each processor */
184713f74950SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
184813f74950SBarry Smith     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
18498965ea79SLois Curfman McInnes     for (i=0; i<size; i++) {
18508965ea79SLois Curfman McInnes       for (j=rowners[i]; j< rowners[i+1]; j++) {
18518965ea79SLois Curfman McInnes         procsnz[i] += rowlengths[j];
18528965ea79SLois Curfman McInnes       }
18538965ea79SLois Curfman McInnes     }
1854606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
18558965ea79SLois Curfman McInnes 
18568965ea79SLois Curfman McInnes     /* determine max buffer needed and allocate it */
18578965ea79SLois Curfman McInnes     maxnz = 0;
18588965ea79SLois Curfman McInnes     for (i=0; i<size; i++) {
18590452661fSBarry Smith       maxnz = PetscMax(maxnz,procsnz[i]);
18608965ea79SLois Curfman McInnes     }
186113f74950SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
18628965ea79SLois Curfman McInnes 
18638965ea79SLois Curfman McInnes     /* read in my part of the matrix column indices  */
18648965ea79SLois Curfman McInnes     nz = procsnz[0];
186513f74950SBarry Smith     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
18660752156aSBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
18678965ea79SLois Curfman McInnes 
18688965ea79SLois Curfman McInnes     /* read in every one elses and ship off */
18698965ea79SLois Curfman McInnes     for (i=1; i<size; i++) {
18708965ea79SLois Curfman McInnes       nz   = procsnz[i];
18710752156aSBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
187213f74950SBarry Smith       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
18738965ea79SLois Curfman McInnes     }
1874606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
18753a40ed3dSBarry Smith   } else {
18768965ea79SLois Curfman McInnes     /* determine buffer space needed for message */
18778965ea79SLois Curfman McInnes     nz = 0;
18788965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
18798965ea79SLois Curfman McInnes       nz += ourlens[i];
18808965ea79SLois Curfman McInnes     }
188113f74950SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
18828965ea79SLois Curfman McInnes 
18838965ea79SLois Curfman McInnes     /* receive message of column indices*/
188413f74950SBarry Smith     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
188513f74950SBarry Smith     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
188629bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
18878965ea79SLois Curfman McInnes   }
18888965ea79SLois Curfman McInnes 
18898965ea79SLois Curfman McInnes   /* loop over local rows, determining number of off diagonal entries */
189013f74950SBarry Smith   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
18918965ea79SLois Curfman McInnes   jj = 0;
18928965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
18938965ea79SLois Curfman McInnes     for (j=0; j<ourlens[i]; j++) {
18948965ea79SLois Curfman McInnes       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
18958965ea79SLois Curfman McInnes       jj++;
18968965ea79SLois Curfman McInnes     }
18978965ea79SLois Curfman McInnes   }
18988965ea79SLois Curfman McInnes 
18998965ea79SLois Curfman McInnes   /* create our matrix */
19008965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
19018965ea79SLois Curfman McInnes     ourlens[i] -= offlens[i];
19028965ea79SLois Curfman McInnes   }
1903f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1904f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1905be5d1d56SKris Buschelman   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1906878740d9SKris Buschelman   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
19078965ea79SLois Curfman McInnes   A = *newmat;
19088965ea79SLois Curfman McInnes   for (i=0; i<m; i++) {
19098965ea79SLois Curfman McInnes     ourlens[i] += offlens[i];
19108965ea79SLois Curfman McInnes   }
19118965ea79SLois Curfman McInnes 
19128965ea79SLois Curfman McInnes   if (!rank) {
191387828ca2SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
19148965ea79SLois Curfman McInnes 
19158965ea79SLois Curfman McInnes     /* read in my part of the matrix numerical values  */
19168965ea79SLois Curfman McInnes     nz = procsnz[0];
19170752156aSBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
19188965ea79SLois Curfman McInnes 
19198965ea79SLois Curfman McInnes     /* insert into matrix */
19208965ea79SLois Curfman McInnes     jj      = rstart;
19218965ea79SLois Curfman McInnes     smycols = mycols;
19228965ea79SLois Curfman McInnes     svals   = vals;
19238965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
19248965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
19258965ea79SLois Curfman McInnes       smycols += ourlens[i];
19268965ea79SLois Curfman McInnes       svals   += ourlens[i];
19278965ea79SLois Curfman McInnes       jj++;
19288965ea79SLois Curfman McInnes     }
19298965ea79SLois Curfman McInnes 
19308965ea79SLois Curfman McInnes     /* read in other processors and ship out */
19318965ea79SLois Curfman McInnes     for (i=1; i<size; i++) {
19328965ea79SLois Curfman McInnes       nz   = procsnz[i];
19330752156aSBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
19347adad957SLisandro Dalcin       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
19358965ea79SLois Curfman McInnes     }
1936606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
19373a40ed3dSBarry Smith   } else {
19388965ea79SLois Curfman McInnes     /* receive numeric values */
193984d528b1SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
19408965ea79SLois Curfman McInnes 
19418965ea79SLois Curfman McInnes     /* receive message of values*/
19427adad957SLisandro Dalcin     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1943ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
194429bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
19458965ea79SLois Curfman McInnes 
19468965ea79SLois Curfman McInnes     /* insert into matrix */
19478965ea79SLois Curfman McInnes     jj      = rstart;
19488965ea79SLois Curfman McInnes     smycols = mycols;
19498965ea79SLois Curfman McInnes     svals   = vals;
19508965ea79SLois Curfman McInnes     for (i=0; i<m; i++) {
19518965ea79SLois Curfman McInnes       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
19528965ea79SLois Curfman McInnes       smycols += ourlens[i];
19538965ea79SLois Curfman McInnes       svals   += ourlens[i];
19548965ea79SLois Curfman McInnes       jj++;
19558965ea79SLois Curfman McInnes     }
19568965ea79SLois Curfman McInnes   }
1957606d414cSSatish Balay   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1958606d414cSSatish Balay   ierr = PetscFree(vals);CHKERRQ(ierr);
1959606d414cSSatish Balay   ierr = PetscFree(mycols);CHKERRQ(ierr);
1960606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
19618965ea79SLois Curfman McInnes 
19626d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19636d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19643a40ed3dSBarry Smith   PetscFunctionReturn(0);
19658965ea79SLois Curfman McInnes }
196690ace30eSBarry Smith 
19676e4ee0c6SHong Zhang #undef __FUNCT__
19686e4ee0c6SHong Zhang #define __FUNCT__ "MatEqual_MPIDense"
19696e4ee0c6SHong Zhang PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
19706e4ee0c6SHong Zhang {
19716e4ee0c6SHong Zhang   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
19726e4ee0c6SHong Zhang   Mat            a,b;
19736e4ee0c6SHong Zhang   PetscTruth     flg;
19746e4ee0c6SHong Zhang   PetscErrorCode ierr;
197590ace30eSBarry Smith 
19766e4ee0c6SHong Zhang   PetscFunctionBegin;
19776e4ee0c6SHong Zhang   a = matA->A;
19786e4ee0c6SHong Zhang   b = matB->A;
19796e4ee0c6SHong Zhang   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
19807adad957SLisandro Dalcin   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
19816e4ee0c6SHong Zhang   PetscFunctionReturn(0);
19826e4ee0c6SHong Zhang }
198390ace30eSBarry Smith 
198490ace30eSBarry Smith 
198590ace30eSBarry Smith 
1986